diff options
-rw-r--r-- | recipes/linux/linux-2.6.24/hipox/hipox-ubifs.patch | 43970 | ||||
-rw-r--r-- | recipes/linux/linux-2.6.24/ubifs-v2.6.24.patch | 45442 | ||||
-rw-r--r-- | recipes/linux/linux_2.6.24.bb | 3 |
3 files changed, 45447 insertions, 43968 deletions
diff --git a/recipes/linux/linux-2.6.24/hipox/hipox-ubifs.patch b/recipes/linux/linux-2.6.24/hipox/hipox-ubifs.patch index 2aef97d4eb..670f22518c 100644 --- a/recipes/linux/linux-2.6.24/hipox/hipox-ubifs.patch +++ b/recipes/linux/linux-2.6.24/hipox/hipox-ubifs.patch @@ -1,43974 +1,10 @@ -diff -Nurd linux-2.6.24.orig/crypto/Kconfig linux-2.6.24/crypto/Kconfig ---- linux-2.6.24.orig/crypto/Kconfig 2009-04-17 09:45:12.000000000 +0200 -+++ linux-2.6.24/crypto/Kconfig 2009-04-17 09:49:26.000000000 +0200 -@@ -502,6 +502,14 @@ - Authenc: Combined mode wrapper for IPsec. - This is required for IPSec. - -+config CRYPTO_LZO -+ tristate "LZO compression algorithm" -+ select CRYPTO_ALGAPI -+ select LZO_COMPRESS -+ select LZO_DECOMPRESS -+ help -+ This is the LZO algorithm. -+ - source "drivers/crypto/Kconfig" - - endif # if CRYPTO -diff -Nurd linux-2.6.24.orig/crypto/Makefile linux-2.6.24/crypto/Makefile ---- linux-2.6.24.orig/crypto/Makefile 2009-04-17 09:45:12.000000000 +0200 -+++ linux-2.6.24/crypto/Makefile 2009-04-17 09:49:26.000000000 +0200 -@@ -51,6 +51,7 @@ - obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o - obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o - obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o -+obj-$(CONFIG_CRYPTO_LZO) += lzo.o - obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o - - obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o -diff -Nurd linux-2.6.24.orig/crypto/lzo.c linux-2.6.24/crypto/lzo.c ---- linux-2.6.24.orig/crypto/lzo.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/crypto/lzo.c 2009-04-17 09:49:26.000000000 +0200 -@@ -0,0 +1,106 @@ -+/* -+ * Cryptographic API. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ */ -+ -+#include <linux/init.h> -+#include <linux/module.h> -+#include <linux/crypto.h> -+#include <linux/vmalloc.h> -+#include <linux/lzo.h> -+ -+struct lzo_ctx { -+ void *lzo_comp_mem; -+}; -+ -+static int lzo_init(struct crypto_tfm *tfm) -+{ -+ struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); -+ -+ ctx->lzo_comp_mem = vmalloc(LZO1X_MEM_COMPRESS); -+ if (!ctx->lzo_comp_mem) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+static void lzo_exit(struct crypto_tfm *tfm) -+{ -+ struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); -+ -+ vfree(ctx->lzo_comp_mem); -+} -+ -+static int lzo_compress(struct crypto_tfm *tfm, const u8 *src, -+ unsigned int slen, u8 *dst, unsigned int *dlen) -+{ -+ struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); -+ size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ -+ int err; -+ -+ err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx->lzo_comp_mem); -+ -+ if (err != LZO_E_OK) -+ return -EINVAL; -+ -+ *dlen = tmp_len; -+ return 0; -+} -+ -+static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src, -+ unsigned int slen, u8 *dst, unsigned int *dlen) -+{ -+ int err; -+ size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ -+ -+ err = lzo1x_decompress_safe(src, slen, dst, &tmp_len); -+ -+ if (err != LZO_E_OK) -+ return -EINVAL; -+ -+ *dlen = tmp_len; -+ return 0; -+ -+} -+ -+static struct crypto_alg alg = { -+ .cra_name = "lzo", -+ .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, -+ .cra_ctxsize = sizeof(struct lzo_ctx), -+ .cra_module = THIS_MODULE, -+ .cra_list = LIST_HEAD_INIT(alg.cra_list), -+ .cra_init = lzo_init, -+ .cra_exit = lzo_exit, -+ .cra_u = { .compress = { -+ .coa_compress = lzo_compress, -+ .coa_decompress = lzo_decompress } } -+}; -+ -+static int __init init(void) -+{ -+ return crypto_register_alg(&alg); -+} -+ -+static void __exit fini(void) -+{ -+ crypto_unregister_alg(&alg); -+} -+ -+module_init(init); -+module_exit(fini); -+ -+MODULE_LICENSE("GPL"); -+MODULE_DESCRIPTION("LZO Compression Algorithm"); -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/Kconfig linux-2.6.24/drivers/mtd/ubi/Kconfig ---- linux-2.6.24.orig/drivers/mtd/ubi/Kconfig 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/Kconfig 2009-04-17 09:49:26.000000000 +0200 -@@ -24,8 +24,13 @@ - erase counter value and the lowest erase counter value of eraseblocks - of UBI devices. When this threshold is exceeded, UBI starts performing - wear leveling by means of moving data from eraseblock with low erase -- counter to eraseblocks with high erase counter. Leave the default -- value if unsure. -+ counter to eraseblocks with high erase counter. -+ -+ The default value should be OK for SLC NAND flashes, NOR flashes and -+ other flashes which have eraseblock life-cycle 100000 or more. -+ However, in case of MLC NAND flashes which typically have eraseblock -+ life-cycle less then 10000, the threshold should be lessened (e.g., -+ to 128 or 256, although it does not have to be power of 2). - - config MTD_UBI_BEB_RESERVE - int "Percentage of reserved eraseblocks for bad eraseblocks handling" -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/build.c linux-2.6.24/drivers/mtd/ubi/build.c ---- linux-2.6.24.orig/drivers/mtd/ubi/build.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/build.c 2009-04-17 09:49:26.000000000 +0200 -@@ -21,11 +21,16 @@ - */ - - /* -- * This file includes UBI initialization and building of UBI devices. At the -- * moment UBI devices may only be added while UBI is initialized, but dynamic -- * device add/remove functionality is planned. Also, at the moment we only -- * attach UBI devices by scanning, which will become a bottleneck when flashes -- * reach certain large size. Then one may improve UBI and add other methods. -+ * This file includes UBI initialization and building of UBI devices. -+ * -+ * When UBI is initialized, it attaches all the MTD devices specified as the -+ * module load parameters or the kernel boot parameters. If MTD devices were -+ * specified, UBI does not attach any MTD device, but it is possible to do -+ * later using the "UBI control device". -+ * -+ * At the moment we only attach UBI devices by scanning, which will become a -+ * bottleneck when flashes reach certain large size. Then one may improve UBI -+ * and add other methods, although it does not seem to be easy to do. - */ - - #include <linux/err.h> -@@ -33,7 +38,9 @@ - #include <linux/moduleparam.h> - #include <linux/stringify.h> - #include <linux/stat.h> -+#include <linux/miscdevice.h> - #include <linux/log2.h> -+#include <linux/kthread.h> - #include "ubi.h" - - /* Maximum length of the 'mtd=' parameter */ -@@ -43,29 +50,39 @@ - * struct mtd_dev_param - MTD device parameter description data structure. - * @name: MTD device name or number string - * @vid_hdr_offs: VID header offset -- * @data_offs: data offset - */ --struct mtd_dev_param --{ -+struct mtd_dev_param { - char name[MTD_PARAM_LEN_MAX]; - int vid_hdr_offs; -- int data_offs; - }; - - /* Numbers of elements set in the @mtd_dev_param array */ --static int mtd_devs = 0; -+static int mtd_devs; - - /* MTD devices specification parameters */ - static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; - --/* Number of UBI devices in system */ --int ubi_devices_cnt; -+/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ -+struct class *ubi_class; -+ -+/* Slab cache for wear-leveling entries */ -+struct kmem_cache *ubi_wl_entry_slab; -+ -+/* UBI control character device */ -+static struct miscdevice ubi_ctrl_cdev = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "ubi_ctrl", -+ .fops = &ubi_ctrl_cdev_operations, -+}; - - /* All UBI devices in system */ --struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; -+static struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; - --/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ --struct class *ubi_class; -+/* Serializes UBI devices creations and removals */ -+DEFINE_MUTEX(ubi_devices_mutex); -+ -+/* Protects @ubi_devices and @ubi->ref_count */ -+static DEFINE_SPINLOCK(ubi_devices_lock); - - /* "Show" method for files in '/<sysfs>/class/ubi/' */ - static ssize_t ubi_version_show(struct class *class, char *buf) -@@ -101,42 +118,157 @@ - __ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL); - static struct device_attribute dev_bgt_enabled = - __ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL); -+static struct device_attribute dev_mtd_num = -+ __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL); -+ -+/** -+ * ubi_get_device - get UBI device. -+ * @ubi_num: UBI device number -+ * -+ * This function returns UBI device description object for UBI device number -+ * @ubi_num, or %NULL if the device does not exist. This function increases the -+ * device reference count to prevent removal of the device. In other words, the -+ * device cannot be removed if its reference count is not zero. -+ */ -+struct ubi_device *ubi_get_device(int ubi_num) -+{ -+ struct ubi_device *ubi; -+ -+ spin_lock(&ubi_devices_lock); -+ ubi = ubi_devices[ubi_num]; -+ if (ubi) { -+ ubi_assert(ubi->ref_count >= 0); -+ ubi->ref_count += 1; -+ get_device(&ubi->dev); -+ } -+ spin_unlock(&ubi_devices_lock); -+ -+ return ubi; -+} -+ -+/** -+ * ubi_put_device - drop an UBI device reference. -+ * @ubi: UBI device description object -+ */ -+void ubi_put_device(struct ubi_device *ubi) -+{ -+ spin_lock(&ubi_devices_lock); -+ ubi->ref_count -= 1; -+ put_device(&ubi->dev); -+ spin_unlock(&ubi_devices_lock); -+} -+ -+/** -+ * ubi_get_by_major - get UBI device by character device major number. -+ * @major: major number -+ * -+ * This function is similar to 'ubi_get_device()', but it searches the device -+ * by its major number. -+ */ -+struct ubi_device *ubi_get_by_major(int major) -+{ -+ int i; -+ struct ubi_device *ubi; -+ -+ spin_lock(&ubi_devices_lock); -+ for (i = 0; i < UBI_MAX_DEVICES; i++) { -+ ubi = ubi_devices[i]; -+ if (ubi && MAJOR(ubi->cdev.dev) == major) { -+ ubi_assert(ubi->ref_count >= 0); -+ ubi->ref_count += 1; -+ get_device(&ubi->dev); -+ spin_unlock(&ubi_devices_lock); -+ return ubi; -+ } -+ } -+ spin_unlock(&ubi_devices_lock); -+ -+ return NULL; -+} -+ -+/** -+ * ubi_major2num - get UBI device number by character device major number. -+ * @major: major number -+ * -+ * This function searches UBI device number object by its major number. If UBI -+ * device was not found, this function returns -ENODEV, otherwise the UBI device -+ * number is returned. -+ */ -+int ubi_major2num(int major) -+{ -+ int i, ubi_num = -ENODEV; -+ -+ spin_lock(&ubi_devices_lock); -+ for (i = 0; i < UBI_MAX_DEVICES; i++) { -+ struct ubi_device *ubi = ubi_devices[i]; -+ -+ if (ubi && MAJOR(ubi->cdev.dev) == major) { -+ ubi_num = ubi->ubi_num; -+ break; -+ } -+ } -+ spin_unlock(&ubi_devices_lock); -+ -+ return ubi_num; -+} - - /* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */ - static ssize_t dev_attribute_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -- const struct ubi_device *ubi; -+ ssize_t ret; -+ struct ubi_device *ubi; - -+ /* -+ * The below code looks weird, but it actually makes sense. We get the -+ * UBI device reference from the contained 'struct ubi_device'. But it -+ * is unclear if the device was removed or not yet. Indeed, if the -+ * device was removed before we increased its reference count, -+ * 'ubi_get_device()' will return -ENODEV and we fail. -+ * -+ * Remember, 'struct ubi_device' is freed in the release function, so -+ * we still can use 'ubi->ubi_num'. -+ */ - ubi = container_of(dev, struct ubi_device, dev); -+ ubi = ubi_get_device(ubi->ubi_num); -+ if (!ubi) -+ return -ENODEV; -+ - if (attr == &dev_eraseblock_size) -- return sprintf(buf, "%d\n", ubi->leb_size); -+ ret = sprintf(buf, "%d\n", ubi->leb_size); - else if (attr == &dev_avail_eraseblocks) -- return sprintf(buf, "%d\n", ubi->avail_pebs); -+ ret = sprintf(buf, "%d\n", ubi->avail_pebs); - else if (attr == &dev_total_eraseblocks) -- return sprintf(buf, "%d\n", ubi->good_peb_count); -+ ret = sprintf(buf, "%d\n", ubi->good_peb_count); - else if (attr == &dev_volumes_count) -- return sprintf(buf, "%d\n", ubi->vol_count); -+ ret = sprintf(buf, "%d\n", ubi->vol_count - UBI_INT_VOL_COUNT); - else if (attr == &dev_max_ec) -- return sprintf(buf, "%d\n", ubi->max_ec); -+ ret = sprintf(buf, "%d\n", ubi->max_ec); - else if (attr == &dev_reserved_for_bad) -- return sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); -+ ret = sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); - else if (attr == &dev_bad_peb_count) -- return sprintf(buf, "%d\n", ubi->bad_peb_count); -+ ret = sprintf(buf, "%d\n", ubi->bad_peb_count); - else if (attr == &dev_max_vol_count) -- return sprintf(buf, "%d\n", ubi->vtbl_slots); -+ ret = sprintf(buf, "%d\n", ubi->vtbl_slots); - else if (attr == &dev_min_io_size) -- return sprintf(buf, "%d\n", ubi->min_io_size); -+ ret = sprintf(buf, "%d\n", ubi->min_io_size); - else if (attr == &dev_bgt_enabled) -- return sprintf(buf, "%d\n", ubi->thread_enabled); -+ ret = sprintf(buf, "%d\n", ubi->thread_enabled); -+ else if (attr == &dev_mtd_num) -+ ret = sprintf(buf, "%d\n", ubi->mtd->index); - else -- BUG(); -+ ret = -EINVAL; - -- return 0; -+ ubi_put_device(ubi); -+ return ret; - } - --/* Fake "release" method for UBI devices */ --static void dev_release(struct device *dev) { } -+static void dev_release(struct device *dev) -+{ -+ struct ubi_device *ubi = container_of(dev, struct ubi_device, dev); -+ -+ kfree(ubi); -+} - - /** - * ubi_sysfs_init - initialize sysfs for an UBI device. -@@ -150,68 +282,44 @@ - int err; - - ubi->dev.release = dev_release; -- ubi->dev.devt = MKDEV(ubi->major, 0); -+ ubi->dev.devt = ubi->cdev.dev; - ubi->dev.class = ubi_class; - sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num); - err = device_register(&ubi->dev); - if (err) -- goto out; -+ return err; - - err = device_create_file(&ubi->dev, &dev_eraseblock_size); - if (err) -- goto out_unregister; -+ return err; - err = device_create_file(&ubi->dev, &dev_avail_eraseblocks); - if (err) -- goto out_eraseblock_size; -+ return err; - err = device_create_file(&ubi->dev, &dev_total_eraseblocks); - if (err) -- goto out_avail_eraseblocks; -+ return err; - err = device_create_file(&ubi->dev, &dev_volumes_count); - if (err) -- goto out_total_eraseblocks; -+ return err; - err = device_create_file(&ubi->dev, &dev_max_ec); - if (err) -- goto out_volumes_count; -+ return err; - err = device_create_file(&ubi->dev, &dev_reserved_for_bad); - if (err) -- goto out_volumes_max_ec; -+ return err; - err = device_create_file(&ubi->dev, &dev_bad_peb_count); - if (err) -- goto out_reserved_for_bad; -+ return err; - err = device_create_file(&ubi->dev, &dev_max_vol_count); - if (err) -- goto out_bad_peb_count; -+ return err; - err = device_create_file(&ubi->dev, &dev_min_io_size); - if (err) -- goto out_max_vol_count; -+ return err; - err = device_create_file(&ubi->dev, &dev_bgt_enabled); - if (err) -- goto out_min_io_size; -- -- return 0; -- --out_min_io_size: -- device_remove_file(&ubi->dev, &dev_min_io_size); --out_max_vol_count: -- device_remove_file(&ubi->dev, &dev_max_vol_count); --out_bad_peb_count: -- device_remove_file(&ubi->dev, &dev_bad_peb_count); --out_reserved_for_bad: -- device_remove_file(&ubi->dev, &dev_reserved_for_bad); --out_volumes_max_ec: -- device_remove_file(&ubi->dev, &dev_max_ec); --out_volumes_count: -- device_remove_file(&ubi->dev, &dev_volumes_count); --out_total_eraseblocks: -- device_remove_file(&ubi->dev, &dev_total_eraseblocks); --out_avail_eraseblocks: -- device_remove_file(&ubi->dev, &dev_avail_eraseblocks); --out_eraseblock_size: -- device_remove_file(&ubi->dev, &dev_eraseblock_size); --out_unregister: -- device_unregister(&ubi->dev); --out: -- ubi_err("failed to initialize sysfs for %s", ubi->ubi_name); -+ return err; -+ err = device_create_file(&ubi->dev, &dev_mtd_num); - return err; - } - -@@ -221,6 +329,7 @@ - */ - static void ubi_sysfs_close(struct ubi_device *ubi) - { -+ device_remove_file(&ubi->dev, &dev_mtd_num); - device_remove_file(&ubi->dev, &dev_bgt_enabled); - device_remove_file(&ubi->dev, &dev_min_io_size); - device_remove_file(&ubi->dev, &dev_max_vol_count); -@@ -244,7 +353,26 @@ - - for (i = 0; i < ubi->vtbl_slots; i++) - if (ubi->volumes[i]) -- ubi_free_volume(ubi, i); -+ ubi_free_volume(ubi, ubi->volumes[i]); -+} -+ -+/** -+ * free_user_volumes - free all user volumes. -+ * @ubi: UBI device description object -+ * -+ * Normally the volumes are freed at the release function of the volume device -+ * objects. However, on error paths the volumes have to be freed before the -+ * device objects have been initialized. -+ */ -+static void free_user_volumes(struct ubi_device *ubi) -+{ -+ int i; -+ -+ for (i = 0; i < ubi->vtbl_slots; i++) -+ if (ubi->volumes[i]) { -+ kfree(ubi->volumes[i]->eba_tbl); -+ kfree(ubi->volumes[i]); -+ } - } - - /** -@@ -252,16 +380,13 @@ - * @ubi: UBI device description object - * - * This function returns zero in case of success and a negative error code in -- * case of failure. -+ * case of failure. Note, this function destroys all volumes if it failes. - */ - static int uif_init(struct ubi_device *ubi) - { - int i, err; - dev_t dev; - -- mutex_init(&ubi->vtbl_mutex); -- spin_lock_init(&ubi->volumes_lock); -- - sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); - - /* -@@ -278,52 +403,72 @@ - return err; - } - -+ ubi_assert(MINOR(dev) == 0); - cdev_init(&ubi->cdev, &ubi_cdev_operations); -- ubi->major = MAJOR(dev); -- dbg_msg("%s major is %u", ubi->ubi_name, ubi->major); -+ dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev)); - ubi->cdev.owner = THIS_MODULE; - -- dev = MKDEV(ubi->major, 0); - err = cdev_add(&ubi->cdev, dev, 1); - if (err) { -- ubi_err("cannot add character device %s", ubi->ubi_name); -+ ubi_err("cannot add character device"); - goto out_unreg; - } - - err = ubi_sysfs_init(ubi); - if (err) -- goto out_cdev; -+ goto out_sysfs; - - for (i = 0; i < ubi->vtbl_slots; i++) - if (ubi->volumes[i]) { -- err = ubi_add_volume(ubi, i); -- if (err) -+ err = ubi_add_volume(ubi, ubi->volumes[i]); -+ if (err) { -+ ubi_err("cannot add volume %d", i); - goto out_volumes; -+ } - } - - return 0; - - out_volumes: - kill_volumes(ubi); -+out_sysfs: - ubi_sysfs_close(ubi); --out_cdev: - cdev_del(&ubi->cdev); - out_unreg: -- unregister_chrdev_region(MKDEV(ubi->major, 0), -- ubi->vtbl_slots + 1); -+ unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); -+ ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err); - return err; - } - - /** - * uif_close - close user interfaces for an UBI device. - * @ubi: UBI device description object -+ * -+ * Note, since this function un-registers UBI volume device objects (@vol->dev), -+ * the memory allocated voe the volumes is freed as well (in the release -+ * function). - */ - static void uif_close(struct ubi_device *ubi) - { - kill_volumes(ubi); - ubi_sysfs_close(ubi); - cdev_del(&ubi->cdev); -- unregister_chrdev_region(MKDEV(ubi->major, 0), ubi->vtbl_slots + 1); -+ unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); -+} -+ -+/** -+ * free_internal_volumes - free internal volumes. -+ * @ubi: UBI device description object -+ */ -+static void free_internal_volumes(struct ubi_device *ubi) -+{ -+ int i; -+ -+ for (i = ubi->vtbl_slots; -+ i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { -+ kfree(ubi->volumes[i]->eba_tbl); -+ kfree(ubi->volumes[i]); -+ } - } - - /** -@@ -370,6 +515,7 @@ - out_wl: - ubi_wl_close(ubi); - out_vtbl: -+ free_internal_volumes(ubi); - vfree(ubi->vtbl); - out_si: - ubi_scan_destroy_si(si); -@@ -377,16 +523,16 @@ - } - - /** -- * io_init - initialize I/O unit for a given UBI device. -+ * io_init - initialize I/O sub-system for a given UBI device. - * @ubi: UBI device description object - * - * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are - * assumed: - * o EC header is always at offset zero - this cannot be changed; - * o VID header starts just after the EC header at the closest address -- * aligned to @io->@hdrs_min_io_size; -+ * aligned to @io->hdrs_min_io_size; - * o data starts just after the VID header at the closest address aligned to -- * @io->@min_io_size -+ * @io->min_io_size - * - * This function returns zero in case of success and a negative error code in - * case of failure. -@@ -407,6 +553,9 @@ - return -EINVAL; - } - -+ if (ubi->vid_hdr_offset < 0) -+ return -EINVAL; -+ - /* - * Note, in this implementation we support MTD devices with 0x7FFFFFFF - * physical eraseblocks maximum. -@@ -422,9 +571,14 @@ - ubi->min_io_size = ubi->mtd->writesize; - ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; - -- /* Make sure minimal I/O unit is power of 2 */ -+ /* -+ * Make sure minimal I/O unit is power of 2. Note, there is no -+ * fundamental reason for this assumption. It is just an optimization -+ * which allows us to avoid costly division operations. -+ */ - if (!is_power_of_2(ubi->min_io_size)) { -- ubi_err("bad min. I/O unit"); -+ ubi_err("min. I/O unit (%d) is not power of 2", -+ ubi->min_io_size); - return -EINVAL; - } - -@@ -453,10 +607,8 @@ - } - - /* Similar for the data offset */ -- if (ubi->leb_start == 0) { -- ubi->leb_start = ubi->vid_hdr_offset + ubi->vid_hdr_alsize; -- ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); -- } -+ ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE; -+ ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); - - dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset); - dbg_msg("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); -@@ -474,7 +626,7 @@ - if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || - ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || - ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || -- ubi->leb_start % ubi->min_io_size) { -+ ubi->leb_start & (ubi->min_io_size - 1)) { - ubi_err("bad VID header (%d) or data offsets (%d)", - ubi->vid_hdr_offset, ubi->leb_start); - return -EINVAL; -@@ -499,8 +651,16 @@ - ubi->ro_mode = 1; - } - -- dbg_msg("leb_size %d", ubi->leb_size); -- dbg_msg("ro_mode %d", ubi->ro_mode); -+ ubi_msg("physical eraseblock size: %d bytes (%d KiB)", -+ ubi->peb_size, ubi->peb_size >> 10); -+ ubi_msg("logical eraseblock size: %d bytes", ubi->leb_size); -+ ubi_msg("smallest flash I/O unit: %d", ubi->min_io_size); -+ if (ubi->hdrs_min_io_size != ubi->min_io_size) -+ ubi_msg("sub-page size: %d", -+ ubi->hdrs_min_io_size); -+ ubi_msg("VID header offset: %d (aligned %d)", -+ ubi->vid_hdr_offset, ubi->vid_hdr_aloffset); -+ ubi_msg("data offset: %d", ubi->leb_start); - - /* - * Note, ideally, we have to initialize ubi->bad_peb_count here. But -@@ -514,89 +674,162 @@ - } - - /** -- * attach_mtd_dev - attach an MTD device. -- * @mtd_dev: MTD device name or number string -- * @vid_hdr_offset: VID header offset -- * @data_offset: data offset -+ * autoresize - re-size the volume which has the "auto-resize" flag set. -+ * @ubi: UBI device description object -+ * @vol_id: ID of the volume to re-size - * -- * This function attaches an MTD device to UBI. It first treats @mtd_dev as the -- * MTD device name, and tries to open it by this name. If it is unable to open, -- * it tries to convert @mtd_dev to an integer and open the MTD device by its -- * number. Returns zero in case of success and a negative error code in case of -- * failure. -+ * This function re-sizes the volume marked by the @UBI_VTBL_AUTORESIZE_FLG in -+ * the volume table to the largest possible size. See comments in ubi-header.h -+ * for more description of the flag. Returns zero in case of success and a -+ * negative error code in case of failure. - */ --static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset, -- int data_offset) -+static int autoresize(struct ubi_device *ubi, int vol_id) - { -- struct ubi_device *ubi; -- struct mtd_info *mtd; -- int i, err; -+ struct ubi_volume_desc desc; -+ struct ubi_volume *vol = ubi->volumes[vol_id]; -+ int err, old_reserved_pebs = vol->reserved_pebs; - -- mtd = get_mtd_device_nm(mtd_dev); -- if (IS_ERR(mtd)) { -- int mtd_num; -- char *endp; -+ /* -+ * Clear the auto-resize flag in the volume in-memory copy of the -+ * volume table, and 'ubi_resize_volume()' will propagate this change -+ * to the flash. -+ */ -+ ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; - -- if (PTR_ERR(mtd) != -ENODEV) -- return PTR_ERR(mtd); -+ if (ubi->avail_pebs == 0) { -+ struct ubi_vtbl_record vtbl_rec; - - /* -- * Probably this is not MTD device name but MTD device number - -- * check this out. -+ * No available PEBs to re-size the volume, clear the flag on -+ * flash and exit. - */ -- mtd_num = simple_strtoul(mtd_dev, &endp, 0); -- if (*endp != '\0' || mtd_dev == endp) { -- ubi_err("incorrect MTD device: \"%s\"", mtd_dev); -- return -ENODEV; -+ memcpy(&vtbl_rec, &ubi->vtbl[vol_id], -+ sizeof(struct ubi_vtbl_record)); -+ err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); -+ if (err) -+ ubi_err("cannot clean auto-resize flag for volume %d", -+ vol_id); -+ } else { -+ desc.vol = vol; -+ err = ubi_resize_volume(&desc, -+ old_reserved_pebs + ubi->avail_pebs); -+ if (err) -+ ubi_err("cannot auto-resize volume %d", vol_id); -+ } -+ -+ if (err) -+ return err; -+ -+ ubi_msg("volume %d (\"%s\") re-sized from %d to %d LEBs", vol_id, -+ vol->name, old_reserved_pebs, vol->reserved_pebs); -+ return 0; -+} -+ -+/** -+ * ubi_attach_mtd_dev - attach an MTD device. -+ * @mtd: MTD device description object -+ * @ubi_num: number to assign to the new UBI device -+ * @vid_hdr_offset: VID header offset -+ * -+ * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number -+ * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in -+ * which case this function finds a vacant device number and assigns it -+ * automatically. Returns the new UBI device number in case of success and a -+ * negative error code in case of failure. -+ * -+ * Note, the invocations of this function has to be serialized by the -+ * @ubi_devices_mutex. -+ */ -+int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) -+{ -+ struct ubi_device *ubi; -+ int i, err, do_free = 1; -+ -+ /* -+ * Check if we already have the same MTD device attached. -+ * -+ * Note, this function assumes that UBI devices creations and deletions -+ * are serialized, so it does not take the &ubi_devices_lock. -+ */ -+ for (i = 0; i < UBI_MAX_DEVICES; i++) { -+ ubi = ubi_devices[i]; -+ if (ubi && mtd->index == ubi->mtd->index) { -+ dbg_err("mtd%d is already attached to ubi%d", -+ mtd->index, i); -+ return -EEXIST; - } -+ } - -- mtd = get_mtd_device(NULL, mtd_num); -- if (IS_ERR(mtd)) -- return PTR_ERR(mtd); -+ /* -+ * Make sure this MTD device is not emulated on top of an UBI volume -+ * already. Well, generally this recursion works fine, but there are -+ * different problems like the UBI module takes a reference to itself -+ * by attaching (and thus, opening) the emulated MTD device. This -+ * results in inability to unload the module. And in general it makes -+ * no sense to attach emulated MTD devices, so we prohibit this. -+ */ -+ if (mtd->type == MTD_UBIVOLUME) { -+ ubi_err("refuse attaching mtd%d - it is already emulated on " -+ "top of UBI", mtd->index); -+ return -EINVAL; - } - -- /* Check if we already have the same MTD device attached */ -- for (i = 0; i < ubi_devices_cnt; i++) -- if (ubi_devices[i]->mtd->index == mtd->index) { -- ubi_err("mtd%d is already attached to ubi%d", -- mtd->index, i); -- err = -EINVAL; -- goto out_mtd; -+ if (ubi_num == UBI_DEV_NUM_AUTO) { -+ /* Search for an empty slot in the @ubi_devices array */ -+ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) -+ if (!ubi_devices[ubi_num]) -+ break; -+ if (ubi_num == UBI_MAX_DEVICES) { -+ dbg_err("only %d UBI devices may be created", -+ UBI_MAX_DEVICES); -+ return -ENFILE; - } -+ } else { -+ if (ubi_num >= UBI_MAX_DEVICES) -+ return -EINVAL; - -- ubi = ubi_devices[ubi_devices_cnt] = kzalloc(sizeof(struct ubi_device), -- GFP_KERNEL); -- if (!ubi) { -- err = -ENOMEM; -- goto out_mtd; -+ /* Make sure ubi_num is not busy */ -+ if (ubi_devices[ubi_num]) { -+ dbg_err("ubi%d already exists", ubi_num); -+ return -EEXIST; -+ } - } - -- ubi->ubi_num = ubi_devices_cnt; -+ ubi = kzalloc(sizeof(struct ubi_device), GFP_KERNEL); -+ if (!ubi) -+ return -ENOMEM; -+ - ubi->mtd = mtd; -+ ubi->ubi_num = ubi_num; -+ ubi->vid_hdr_offset = vid_hdr_offset; -+ ubi->autoresize_vol_id = -1; - -- dbg_msg("attaching mtd%d to ubi%d: VID header offset %d data offset %d", -- ubi->mtd->index, ubi_devices_cnt, vid_hdr_offset, data_offset); -+ mutex_init(&ubi->buf_mutex); -+ mutex_init(&ubi->ckvol_mutex); -+ mutex_init(&ubi->mult_mutex); -+ mutex_init(&ubi->volumes_mutex); -+ spin_lock_init(&ubi->volumes_lock); -+ -+ ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num); - -- ubi->vid_hdr_offset = vid_hdr_offset; -- ubi->leb_start = data_offset; - err = io_init(ubi); - if (err) - goto out_free; - -- mutex_init(&ubi->buf_mutex); -+ err = -ENOMEM; - ubi->peb_buf1 = vmalloc(ubi->peb_size); - if (!ubi->peb_buf1) - goto out_free; - - ubi->peb_buf2 = vmalloc(ubi->peb_size); - if (!ubi->peb_buf2) -- goto out_free; -+ goto out_free; - - #ifdef CONFIG_MTD_UBI_DEBUG - mutex_init(&ubi->dbg_buf_mutex); - ubi->dbg_peb_buf = vmalloc(ubi->peb_size); - if (!ubi->dbg_peb_buf) -- goto out_free; -+ goto out_free; - #endif - - err = attach_by_scanning(ubi); -@@ -605,22 +838,29 @@ - goto out_free; - } - -+ if (ubi->autoresize_vol_id != -1) { -+ err = autoresize(ubi, ubi->autoresize_vol_id); -+ if (err) -+ goto out_detach; -+ } -+ - err = uif_init(ubi); - if (err) -- goto out_detach; -+ goto out_nofree; - -- ubi_msg("attached mtd%d to ubi%d", ubi->mtd->index, ubi_devices_cnt); -- ubi_msg("MTD device name: \"%s\"", ubi->mtd->name); -+ ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); -+ if (IS_ERR(ubi->bgt_thread)) { -+ err = PTR_ERR(ubi->bgt_thread); -+ ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, -+ err); -+ goto out_uif; -+ } -+ -+ ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num); -+ ubi_msg("MTD device name: \"%s\"", mtd->name); - ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); -- ubi_msg("physical eraseblock size: %d bytes (%d KiB)", -- ubi->peb_size, ubi->peb_size >> 10); -- ubi_msg("logical eraseblock size: %d bytes", ubi->leb_size); - ubi_msg("number of good PEBs: %d", ubi->good_peb_count); - ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); -- ubi_msg("smallest flash I/O unit: %d", ubi->min_io_size); -- ubi_msg("VID header offset: %d (aligned %d)", -- ubi->vid_hdr_offset, ubi->vid_hdr_aloffset); -- ubi_msg("data offset: %d", ubi->leb_start); - ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); - ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); - ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); -@@ -632,18 +872,22 @@ - ubi->beb_rsvd_pebs); - ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); - -- /* Enable the background thread */ -- if (!DBG_DISABLE_BGT) { -+ if (!DBG_DISABLE_BGT) - ubi->thread_enabled = 1; -- wake_up_process(ubi->bgt_thread); -- } -+ wake_up_process(ubi->bgt_thread); - -- ubi_devices_cnt += 1; -- return 0; -+ ubi_devices[ubi_num] = ubi; -+ return ubi_num; - -+out_uif: -+ uif_close(ubi); -+out_nofree: -+ do_free = 0; - out_detach: -- ubi_eba_close(ubi); - ubi_wl_close(ubi); -+ if (do_free) -+ free_user_volumes(ubi); -+ free_internal_volumes(ubi); - vfree(ubi->vtbl); - out_free: - vfree(ubi->peb_buf1); -@@ -652,24 +896,67 @@ - vfree(ubi->dbg_peb_buf); - #endif - kfree(ubi); --out_mtd: -- put_mtd_device(mtd); -- ubi_devices[ubi_devices_cnt] = NULL; - return err; - } - - /** -- * detach_mtd_dev - detach an MTD device. -- * @ubi: UBI device description object -+ * ubi_detach_mtd_dev - detach an MTD device. -+ * @ubi_num: UBI device number to detach from -+ * @anyway: detach MTD even if device reference count is not zero -+ * -+ * This function destroys an UBI device number @ubi_num and detaches the -+ * underlying MTD device. Returns zero in case of success and %-EBUSY if the -+ * UBI device is busy and cannot be destroyed, and %-EINVAL if it does not -+ * exist. -+ * -+ * Note, the invocations of this function has to be serialized by the -+ * @ubi_devices_mutex. - */ --static void detach_mtd_dev(struct ubi_device *ubi) -+int ubi_detach_mtd_dev(int ubi_num, int anyway) - { -- int ubi_num = ubi->ubi_num, mtd_num = ubi->mtd->index; -+ struct ubi_device *ubi; - -+ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) -+ return -EINVAL; -+ -+ spin_lock(&ubi_devices_lock); -+ ubi = ubi_devices[ubi_num]; -+ if (!ubi) { -+ spin_unlock(&ubi_devices_lock); -+ return -EINVAL; -+ } -+ -+ if (ubi->ref_count) { -+ if (!anyway) { -+ spin_unlock(&ubi_devices_lock); -+ return -EBUSY; -+ } -+ /* This may only happen if there is a bug */ -+ ubi_err("%s reference count %d, destroy anyway", -+ ubi->ubi_name, ubi->ref_count); -+ } -+ ubi_devices[ubi_num] = NULL; -+ spin_unlock(&ubi_devices_lock); -+ -+ ubi_assert(ubi_num == ubi->ubi_num); - dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); -+ -+ /* -+ * Before freeing anything, we have to stop the background thread to -+ * prevent it from doing anything on this device while we are freeing. -+ */ -+ if (ubi->bgt_thread) -+ kthread_stop(ubi->bgt_thread); -+ -+ /* -+ * Get a reference to the device in order to prevent 'dev_release()' -+ * from freeing @ubi object. -+ */ -+ get_device(&ubi->dev); -+ - uif_close(ubi); -- ubi_eba_close(ubi); - ubi_wl_close(ubi); -+ free_internal_volumes(ubi); - vfree(ubi->vtbl); - put_mtd_device(ubi->mtd); - vfree(ubi->peb_buf1); -@@ -677,11 +964,37 @@ - #ifdef CONFIG_MTD_UBI_DEBUG - vfree(ubi->dbg_peb_buf); - #endif -- kfree(ubi_devices[ubi_num]); -- ubi_devices[ubi_num] = NULL; -- ubi_devices_cnt -= 1; -- ubi_assert(ubi_devices_cnt >= 0); -- ubi_msg("mtd%d is detached from ubi%d", mtd_num, ubi_num); -+ ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); -+ put_device(&ubi->dev); -+ return 0; -+} -+ -+/** -+ * find_mtd_device - open an MTD device by its name or number. -+ * @mtd_dev: name or number of the device -+ * -+ * This function tries to open and MTD device described by @mtd_dev string, -+ * which is first treated as an ASCII number, and if it is not true, it is -+ * treated as MTD device name. Returns MTD device description object in case of -+ * success and a negative error code in case of failure. -+ */ -+static struct mtd_info * __init open_mtd_device(const char *mtd_dev) -+{ -+ struct mtd_info *mtd; -+ int mtd_num; -+ char *endp; -+ -+ mtd_num = simple_strtoul(mtd_dev, &endp, 0); -+ if (*endp != '\0' || mtd_dev == endp) { -+ /* -+ * This does not look like an ASCII integer, probably this is -+ * MTD device name. -+ */ -+ mtd = get_mtd_device_nm(mtd_dev); -+ } else -+ mtd = get_mtd_device(NULL, mtd_num); -+ -+ return mtd; - } - - static int __init ubi_init(void) -@@ -693,55 +1006,101 @@ - BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); - - if (mtd_devs > UBI_MAX_DEVICES) { -- printk("UBI error: too many MTD devices, maximum is %d\n", -- UBI_MAX_DEVICES); -+ ubi_err("too many MTD devices, maximum is %d", UBI_MAX_DEVICES); - return -EINVAL; - } - -+ /* Create base sysfs directory and sysfs files */ - ubi_class = class_create(THIS_MODULE, UBI_NAME_STR); -- if (IS_ERR(ubi_class)) -- return PTR_ERR(ubi_class); -+ if (IS_ERR(ubi_class)) { -+ err = PTR_ERR(ubi_class); -+ ubi_err("cannot create UBI class"); -+ goto out; -+ } - - err = class_create_file(ubi_class, &ubi_version); -- if (err) -+ if (err) { -+ ubi_err("cannot create sysfs file"); - goto out_class; -+ } -+ -+ err = misc_register(&ubi_ctrl_cdev); -+ if (err) { -+ ubi_err("cannot register device"); -+ goto out_version; -+ } -+ -+ ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", -+ sizeof(struct ubi_wl_entry), -+ 0, 0, NULL); -+ if (!ubi_wl_entry_slab) -+ goto out_dev_unreg; - - /* Attach MTD devices */ - for (i = 0; i < mtd_devs; i++) { - struct mtd_dev_param *p = &mtd_dev_param[i]; -+ struct mtd_info *mtd; - - cond_resched(); -- err = attach_mtd_dev(p->name, p->vid_hdr_offs, p->data_offs); -- if (err) -+ -+ mtd = open_mtd_device(p->name); -+ if (IS_ERR(mtd)) { -+ err = PTR_ERR(mtd); - goto out_detach; -+ } -+ -+ mutex_lock(&ubi_devices_mutex); -+ err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, -+ p->vid_hdr_offs); -+ mutex_unlock(&ubi_devices_mutex); -+ if (err < 0) { -+ put_mtd_device(mtd); -+ ubi_err("cannot attach mtd%d", mtd->index); -+ goto out_detach; -+ } - } - - return 0; - - out_detach: - for (k = 0; k < i; k++) -- detach_mtd_dev(ubi_devices[k]); -+ if (ubi_devices[k]) { -+ mutex_lock(&ubi_devices_mutex); -+ ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); -+ mutex_unlock(&ubi_devices_mutex); -+ } -+ kmem_cache_destroy(ubi_wl_entry_slab); -+out_dev_unreg: -+ misc_deregister(&ubi_ctrl_cdev); -+out_version: - class_remove_file(ubi_class, &ubi_version); - out_class: - class_destroy(ubi_class); -+out: -+ ubi_err("UBI error: cannot initialize UBI, error %d", err); - return err; - } - module_init(ubi_init); - - static void __exit ubi_exit(void) - { -- int i, n = ubi_devices_cnt; -+ int i; - -- for (i = 0; i < n; i++) -- detach_mtd_dev(ubi_devices[i]); -+ for (i = 0; i < UBI_MAX_DEVICES; i++) -+ if (ubi_devices[i]) { -+ mutex_lock(&ubi_devices_mutex); -+ ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); -+ mutex_unlock(&ubi_devices_mutex); -+ } -+ kmem_cache_destroy(ubi_wl_entry_slab); -+ misc_deregister(&ubi_ctrl_cdev); - class_remove_file(ubi_class, &ubi_version); - class_destroy(ubi_class); - } - module_exit(ubi_exit); - - /** -- * bytes_str_to_int - convert a string representing number of bytes to an -- * integer. -+ * bytes_str_to_int - convert a number of bytes string into an integer. - * @str: the string to convert - * - * This function returns positive resulting integer in case of success and a -@@ -754,7 +1113,8 @@ - - result = simple_strtoul(str, &endp, 0); - if (str == endp || result < 0) { -- printk("UBI error: incorrect bytes count: \"%s\"\n", str); -+ printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", -+ str); - return -EINVAL; - } - -@@ -764,15 +1124,14 @@ - case 'M': - result *= 1024; - case 'K': -- case 'k': - result *= 1024; -- if (endp[1] == 'i' && (endp[2] == '\0' || -- endp[2] == 'B' || endp[2] == 'b')) -+ if (endp[1] == 'i' && endp[2] == 'B') - endp += 2; - case '\0': - break; - default: -- printk("UBI error: incorrect bytes count: \"%s\"\n", str); -+ printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", -+ str); - return -EINVAL; - } - -@@ -793,23 +1152,27 @@ - struct mtd_dev_param *p; - char buf[MTD_PARAM_LEN_MAX]; - char *pbuf = &buf[0]; -- char *tokens[3] = {NULL, NULL, NULL}; -+ char *tokens[2] = {NULL, NULL}; -+ -+ if (!val) -+ return -EINVAL; - - if (mtd_devs == UBI_MAX_DEVICES) { -- printk("UBI error: too many parameters, max. is %d\n", -+ printk(KERN_ERR "UBI error: too many parameters, max. is %d\n", - UBI_MAX_DEVICES); - return -EINVAL; - } - - len = strnlen(val, MTD_PARAM_LEN_MAX); - if (len == MTD_PARAM_LEN_MAX) { -- printk("UBI error: parameter \"%s\" is too long, max. is %d\n", -- val, MTD_PARAM_LEN_MAX); -+ printk(KERN_ERR "UBI error: parameter \"%s\" is too long, " -+ "max. is %d\n", val, MTD_PARAM_LEN_MAX); - return -EINVAL; - } - - if (len == 0) { -- printk("UBI warning: empty 'mtd=' parameter - ignored\n"); -+ printk(KERN_WARNING "UBI warning: empty 'mtd=' parameter - " -+ "ignored\n"); - return 0; - } - -@@ -819,11 +1182,12 @@ - if (buf[len - 1] == '\n') - buf[len - 1] = '\0'; - -- for (i = 0; i < 3; i++) -+ for (i = 0; i < 2; i++) - tokens[i] = strsep(&pbuf, ","); - - if (pbuf) { -- printk("UBI error: too many arguments at \"%s\"\n", val); -+ printk(KERN_ERR "UBI error: too many arguments at \"%s\"\n", -+ val); - return -EINVAL; - } - -@@ -832,13 +1196,9 @@ - - if (tokens[1]) - p->vid_hdr_offs = bytes_str_to_int(tokens[1]); -- if (tokens[2]) -- p->data_offs = bytes_str_to_int(tokens[2]); - - if (p->vid_hdr_offs < 0) - return p->vid_hdr_offs; -- if (p->data_offs < 0) -- return p->data_offs; - - mtd_devs += 1; - return 0; -@@ -846,16 +1206,15 @@ - - module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); - MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: " -- "mtd=<name|num>[,<vid_hdr_offs>,<data_offs>]. " -+ "mtd=<name|num>[,<vid_hdr_offs>].\n" - "Multiple \"mtd\" parameters may be specified.\n" -- "MTD devices may be specified by their number or name. " -- "Optional \"vid_hdr_offs\" and \"data_offs\" parameters " -- "specify UBI VID header position and data starting " -- "position to be used by UBI.\n" -- "Example: mtd=content,1984,2048 mtd=4 - attach MTD device" -- "with name content using VID header offset 1984 and data " -- "start 2048, and MTD device number 4 using default " -- "offsets"); -+ "MTD devices may be specified by their number or name.\n" -+ "Optional \"vid_hdr_offs\" parameter specifies UBI VID " -+ "header position and data starting position to be used " -+ "by UBI.\n" -+ "Example: mtd=content,1984 mtd=4 - attach MTD device" -+ "with name \"content\" using VID header offset 1984, and " -+ "MTD device number 4 with default VID header offset."); - - MODULE_VERSION(__stringify(UBI_VERSION)); - MODULE_DESCRIPTION("UBI - Unsorted Block Images"); -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/cdev.c linux-2.6.24/drivers/mtd/ubi/cdev.c ---- linux-2.6.24.orig/drivers/mtd/ubi/cdev.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/cdev.c 2009-04-17 09:49:26.000000000 +0200 -@@ -28,45 +28,22 @@ - * - * Major and minor numbers are assigned dynamically to both UBI and volume - * character devices. -+ * -+ * Well, there is the third kind of character devices - the UBI control -+ * character device, which allows to manipulate by UBI devices - create and -+ * delete them. In other words, it is used for attaching and detaching MTD -+ * devices. - */ - - #include <linux/module.h> - #include <linux/stat.h> - #include <linux/ioctl.h> - #include <linux/capability.h> -+#include <linux/uaccess.h> -+#include <linux/compat.h> - #include <mtd/ubi-user.h> --#include <asm/uaccess.h> --#include <asm/div64.h> - #include "ubi.h" - --/* -- * Maximum sequence numbers of UBI and volume character device IOCTLs (direct -- * logical eraseblock erase is a debug-only feature). -- */ --#define UBI_CDEV_IOC_MAX_SEQ 2 --#ifndef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO --#define VOL_CDEV_IOC_MAX_SEQ 1 --#else --#define VOL_CDEV_IOC_MAX_SEQ 2 --#endif -- --/** -- * major_to_device - get UBI device object by character device major number. -- * @major: major number -- * -- * This function returns a pointer to the UBI device object. -- */ --static struct ubi_device *major_to_device(int major) --{ -- int i; -- -- for (i = 0; i < ubi_devices_cnt; i++) -- if (ubi_devices[i] && ubi_devices[i]->major == major) -- return ubi_devices[i]; -- BUG(); -- return NULL; --} -- - /** - * get_exclusive - get exclusive access to an UBI volume. - * @desc: volume descriptor -@@ -124,18 +101,20 @@ - static int vol_cdev_open(struct inode *inode, struct file *file) - { - struct ubi_volume_desc *desc; -- const struct ubi_device *ubi = major_to_device(imajor(inode)); -- int vol_id = iminor(inode) - 1; -- int mode; -+ int vol_id = iminor(inode) - 1, mode, ubi_num; -+ -+ ubi_num = ubi_major2num(imajor(inode)); -+ if (ubi_num < 0) -+ return ubi_num; - - if (file->f_mode & FMODE_WRITE) - mode = UBI_READWRITE; - else - mode = UBI_READONLY; - -- dbg_msg("open volume %d, mode %d", vol_id, mode); -+ dbg_gen("open volume %d, mode %d", vol_id, mode); - -- desc = ubi_open_volume(ubi->ubi_num, vol_id, mode); -+ desc = ubi_open_volume(ubi_num, vol_id, mode); - if (IS_ERR(desc)) - return PTR_ERR(desc); - -@@ -148,13 +127,20 @@ - struct ubi_volume_desc *desc = file->private_data; - struct ubi_volume *vol = desc->vol; - -- dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode); -+ dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode); - - if (vol->updating) { - ubi_warn("update of volume %d not finished, volume is damaged", - vol->vol_id); -+ ubi_assert(!vol->changing_leb); - vol->updating = 0; - vfree(vol->upd_buf); -+ } else if (vol->changing_leb) { -+ dbg_gen("only %lld of %lld bytes received for atomic LEB change" -+ " for volume %d:%d, cancel", vol->upd_received, -+ vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id); -+ vol->changing_leb = 0; -+ vfree(vol->upd_buf); - } - - ubi_close_volume(desc); -@@ -192,7 +178,7 @@ - return -EINVAL; - } - -- dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld", -+ dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld", - vol->vol_id, offset, origin, new_offset); - - file->f_pos = new_offset; -@@ -205,13 +191,12 @@ - struct ubi_volume_desc *desc = file->private_data; - struct ubi_volume *vol = desc->vol; - struct ubi_device *ubi = vol->ubi; -- int err, lnum, off, len, vol_id = desc->vol->vol_id, tbuf_size; -+ int err, lnum, off, len, tbuf_size; - size_t count_save = count; - void *tbuf; -- uint64_t tmp; - -- dbg_msg("read %zd bytes from offset %lld of volume %d", -- count, *offp, vol_id); -+ dbg_gen("read %zd bytes from offset %lld of volume %d", -+ count, *offp, vol->vol_id); - - if (vol->updating) { - dbg_err("updating"); -@@ -225,7 +210,7 @@ - return 0; - - if (vol->corrupted) -- dbg_msg("read from corrupted volume %d", vol_id); -+ dbg_gen("read from corrupted volume %d", vol->vol_id); - - if (*offp + count > vol->used_bytes) - count_save = count = vol->used_bytes - *offp; -@@ -238,10 +223,7 @@ - return -ENOMEM; - - len = count > tbuf_size ? tbuf_size : count; -- -- tmp = *offp; -- off = do_div(tmp, vol->usable_leb_size); -- lnum = tmp; -+ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); - - do { - cond_resched(); -@@ -249,7 +231,7 @@ - if (off + len >= vol->usable_leb_size) - len = vol->usable_leb_size - off; - -- err = ubi_eba_read_leb(ubi, vol_id, lnum, tbuf, off, len, 0); -+ err = ubi_eba_read_leb(ubi, vol, lnum, tbuf, off, len, 0); - if (err) - break; - -@@ -289,22 +271,18 @@ - struct ubi_volume_desc *desc = file->private_data; - struct ubi_volume *vol = desc->vol; - struct ubi_device *ubi = vol->ubi; -- int lnum, off, len, tbuf_size, vol_id = vol->vol_id, err = 0; -+ int lnum, off, len, tbuf_size, err = 0; - size_t count_save = count; - char *tbuf; -- uint64_t tmp; - -- dbg_msg("requested: write %zd bytes to offset %lld of volume %u", -- count, *offp, desc->vol->vol_id); -+ dbg_gen("requested: write %zd bytes to offset %lld of volume %u", -+ count, *offp, vol->vol_id); - - if (vol->vol_type == UBI_STATIC_VOLUME) - return -EROFS; - -- tmp = *offp; -- off = do_div(tmp, vol->usable_leb_size); -- lnum = tmp; -- -- if (off % ubi->min_io_size) { -+ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); -+ if (off & (ubi->min_io_size - 1)) { - dbg_err("unaligned position"); - return -EINVAL; - } -@@ -313,7 +291,7 @@ - count_save = count = vol->used_bytes - *offp; - - /* We can write only in fractions of the minimum I/O unit */ -- if (count % ubi->min_io_size) { -+ if (count & (ubi->min_io_size - 1)) { - dbg_err("unaligned write length"); - return -EINVAL; - } -@@ -339,7 +317,7 @@ - break; - } - -- err = ubi_eba_write_leb(ubi, vol_id, lnum, tbuf, off, len, -+ err = ubi_eba_write_leb(ubi, vol, lnum, tbuf, off, len, - UBI_UNKNOWN); - if (err) - break; -@@ -361,7 +339,7 @@ - } - - #else --#define vol_cdev_direct_write(file, buf, count, offp) -EPERM -+#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM) - #endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */ - - static ssize_t vol_cdev_write(struct file *file, const char __user *buf, -@@ -372,22 +350,32 @@ - struct ubi_volume *vol = desc->vol; - struct ubi_device *ubi = vol->ubi; - -- if (!vol->updating) -+ if (!vol->updating && !vol->changing_leb) - return vol_cdev_direct_write(file, buf, count, offp); - -- err = ubi_more_update_data(ubi, vol->vol_id, buf, count); -+ if (vol->updating) -+ err = ubi_more_update_data(ubi, vol, buf, count); -+ else -+ err = ubi_more_leb_change_data(ubi, vol, buf, count); -+ - if (err < 0) { -- ubi_err("cannot write %zd bytes of update data", count); -+ ubi_err("cannot accept more %zd bytes of data, error %d", -+ count, err); - return err; - } - - if (err) { - /* -- * Update is finished, @err contains number of actually written -- * bytes now. -+ * The operation is finished, @err contains number of actually -+ * written bytes. - */ - count = err; - -+ if (vol->changing_leb) { -+ revoke_exclusive(desc, UBI_READWRITE); -+ return count; -+ } -+ - err = ubi_check_volume(ubi, vol->vol_id); - if (err < 0) - return err; -@@ -402,12 +390,11 @@ - revoke_exclusive(desc, UBI_READWRITE); - } - -- *offp += count; - return count; - } - --static int vol_cdev_ioctl(struct inode *inode, struct file *file, -- unsigned int cmd, unsigned long arg) -+static long vol_cdev_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) - { - int err = 0; - struct ubi_volume_desc *desc = file->private_data; -@@ -437,7 +424,8 @@ - break; - } - -- rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad); -+ rsvd_bytes = (long long)vol->reserved_pebs * -+ ubi->leb_size-vol->data_pad; - if (bytes < 0 || bytes > rsvd_bytes) { - err = -EINVAL; - break; -@@ -447,15 +435,49 @@ - if (err < 0) - break; - -- err = ubi_start_update(ubi, vol->vol_id, bytes); -+ err = ubi_start_update(ubi, vol, bytes); - if (bytes == 0) - revoke_exclusive(desc, UBI_READWRITE); -+ break; -+ } - -- file->f_pos = 0; -+ /* Atomic logical eraseblock change command */ -+ case UBI_IOCEBCH: -+ { -+ struct ubi_leb_change_req req; -+ -+ err = copy_from_user(&req, argp, -+ sizeof(struct ubi_leb_change_req)); -+ if (err) { -+ err = -EFAULT; -+ break; -+ } -+ -+ if (desc->mode == UBI_READONLY || -+ vol->vol_type == UBI_STATIC_VOLUME) { -+ err = -EROFS; -+ break; -+ } -+ -+ /* Validate the request */ -+ err = -EINVAL; -+ if (req.lnum < 0 || req.lnum >= vol->reserved_pebs || -+ req.bytes < 0 || req.lnum >= vol->usable_leb_size) -+ break; -+ if (req.dtype != UBI_LONGTERM && req.dtype != UBI_SHORTTERM && -+ req.dtype != UBI_UNKNOWN) -+ break; -+ -+ err = get_exclusive(desc); -+ if (err < 0) -+ break; -+ -+ err = ubi_start_leb_change(ubi, vol, &req); -+ if (req.bytes == 0) -+ revoke_exclusive(desc, UBI_READWRITE); - break; - } - --#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO - /* Logical eraseblock erasure command */ - case UBI_IOCEBER: - { -@@ -467,7 +489,8 @@ - break; - } - -- if (desc->mode == UBI_READONLY) { -+ if (desc->mode == UBI_READONLY || -+ vol->vol_type == UBI_STATIC_VOLUME) { - err = -EROFS; - break; - } -@@ -477,26 +500,61 @@ - break; - } - -- if (vol->vol_type != UBI_DYNAMIC_VOLUME) { -- err = -EROFS; -+ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); -+ err = ubi_eba_unmap_leb(ubi, vol, lnum); -+ if (err) -+ break; -+ -+ err = ubi_wl_flush(ubi); -+ break; -+ } -+ -+ /* Logical eraseblock map command */ -+ case UBI_IOCEBMAP: -+ { -+ struct ubi_map_req req; -+ -+ err = copy_from_user(&req, argp, sizeof(struct ubi_map_req)); -+ if (err) { -+ err = -EFAULT; - break; - } -+ err = ubi_leb_map(desc, req.lnum, req.dtype); -+ break; -+ } - -- dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); -- err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum); -- if (err) -+ /* Logical eraseblock un-map command */ -+ case UBI_IOCEBUNMAP: -+ { -+ int32_t lnum; -+ -+ err = get_user(lnum, (__user int32_t *)argp); -+ if (err) { -+ err = -EFAULT; - break; -+ } -+ err = ubi_leb_unmap(desc, lnum); -+ break; -+ } - -- err = ubi_wl_flush(ubi); -+ /* Check if logical eraseblock is mapped command */ -+ case UBI_IOCEBISMAP: -+ { -+ int32_t lnum; -+ -+ err = get_user(lnum, (__user int32_t *)argp); -+ if (err) { -+ err = -EFAULT; -+ break; -+ } -+ err = ubi_is_mapped(desc, lnum); - break; - } --#endif - - default: - err = -ENOTTY; - break; - } -- - return err; - } - -@@ -533,7 +591,7 @@ - if (req->alignment > ubi->leb_size) - goto bad; - -- n = req->alignment % ubi->min_io_size; -+ n = req->alignment & (ubi->min_io_size - 1); - if (req->alignment != 1 && n) - goto bad; - -@@ -542,6 +600,10 @@ - goto bad; - } - -+ n = strnlen(req->name, req->name_len + 1); -+ if (n != req->name_len) -+ goto bad; -+ - return 0; - - bad: -@@ -569,8 +631,169 @@ - return 0; - } - --static int ubi_cdev_ioctl(struct inode *inode, struct file *file, -- unsigned int cmd, unsigned long arg) -+/** -+ * rename_volumes - rename UBI volumes. -+ * @ubi: UBI device description object -+ * @req: volumes re-name request -+ * -+ * This is a helper function for the volume re-name IOCTL which validates the -+ * the request, opens the volume and calls corresponding volumes management -+ * function. Returns zero in case of success and a negative error code in case -+ * of failure. -+ */ -+static int rename_volumes(struct ubi_device *ubi, -+ struct ubi_rnvol_req *req) -+{ -+ int i, n, err; -+ struct list_head rename_list; -+ struct ubi_rename_entry *re, *re1; -+ -+ if (req->count < 0 || req->count > UBI_MAX_RNVOL) -+ return -EINVAL; -+ -+ if (req->count == 0) -+ return 0; -+ -+ /* Validate volume IDs and names in the request */ -+ for (i = 0; i < req->count; i++) { -+ if (req->ents[i].vol_id < 0 || -+ req->ents[i].vol_id >= ubi->vtbl_slots) -+ return -EINVAL; -+ if (req->ents[i].name_len < 0) -+ return -EINVAL; -+ if (req->ents[i].name_len > UBI_VOL_NAME_MAX) -+ return -ENAMETOOLONG; -+ req->ents[i].name[req->ents[i].name_len] = '\0'; -+ n = strlen(req->ents[i].name); -+ if (n != req->ents[i].name_len) -+ err = -EINVAL; -+ } -+ -+ /* Make sure volume IDs and names are unique */ -+ for (i = 0; i < req->count - 1; i++) { -+ for (n = i + 1; n < req->count; n++) { -+ if (req->ents[i].vol_id == req->ents[n].vol_id) { -+ dbg_err("duplicated volume id %d", -+ req->ents[i].vol_id); -+ return -EINVAL; -+ } -+ if (!strcmp(req->ents[i].name, req->ents[n].name)) { -+ dbg_err("duplicated volume name \"%s\"", -+ req->ents[i].name); -+ return -EINVAL; -+ } -+ } -+ } -+ -+ /* Create the re-name list */ -+ INIT_LIST_HEAD(&rename_list); -+ for (i = 0; i < req->count; i++) { -+ int vol_id = req->ents[i].vol_id; -+ int name_len = req->ents[i].name_len; -+ const char *name = req->ents[i].name; -+ -+ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); -+ if (!re) { -+ err = -ENOMEM; -+ goto out_free; -+ } -+ -+ re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE); -+ if (IS_ERR(re->desc)) { -+ err = PTR_ERR(re->desc); -+ dbg_err("cannot open volume %d, error %d", vol_id, err); -+ kfree(re); -+ goto out_free; -+ } -+ -+ /* Skip this re-naming if the name does not really change */ -+ if (re->desc->vol->name_len == name_len && -+ !memcmp(re->desc->vol->name, name, name_len)) { -+ ubi_close_volume(re->desc); -+ kfree(re); -+ continue; -+ } -+ -+ re->new_name_len = name_len; -+ memcpy(re->new_name, name, name_len); -+ list_add_tail(&re->list, &rename_list); -+ dbg_msg("will rename volume %d from \"%s\" to \"%s\"", -+ vol_id, re->desc->vol->name, name); -+ } -+ -+ if (list_empty(&rename_list)) -+ return 0; -+ -+ /* Find out the volumes which have to be removed */ -+ list_for_each_entry(re, &rename_list, list) { -+ struct ubi_volume_desc *desc; -+ int no_remove_needed = 0; -+ -+ /* -+ * Volume @re->vol_id is going to be re-named to -+ * @re->new_name, while its current name is @name. If a volume -+ * with name @re->new_name currently exists, it has to be -+ * removed, unless it is also re-named in the request (@req). -+ */ -+ list_for_each_entry(re1, &rename_list, list) { -+ if (re->new_name_len == re1->desc->vol->name_len && -+ !memcmp(re->new_name, re1->desc->vol->name, -+ re1->desc->vol->name_len)) { -+ no_remove_needed = 1; -+ break; -+ } -+ } -+ -+ if (no_remove_needed) -+ continue; -+ -+ /* -+ * It seems we need to remove volume with name @re->new_name, -+ * if it exists. -+ */ -+ desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, -+ UBI_EXCLUSIVE); -+ if (IS_ERR(desc)) { -+ err = PTR_ERR(desc); -+ if (err == -ENODEV) -+ /* Re-naming into a non-existing volume name */ -+ continue; -+ -+ /* The volume exists but busy, or an error occurred */ -+ dbg_err("cannot open volume \"%s\", error %d", -+ re->new_name, err); -+ goto out_free; -+ } -+ -+ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); -+ if (!re) { -+ err = -ENOMEM; -+ ubi_close_volume(desc); -+ goto out_free; -+ } -+ -+ re->remove = 1; -+ re->desc = desc; -+ list_add(&re->list, &rename_list); -+ dbg_msg("will remove volume %d, name \"%s\"", -+ re->desc->vol->vol_id, re->desc->vol->name); -+ } -+ -+ mutex_lock(&ubi->volumes_mutex); -+ err = ubi_rename_volumes(ubi, &rename_list); -+ mutex_unlock(&ubi->volumes_mutex); -+ -+out_free: -+ list_for_each_entry_safe(re, re1, &rename_list, list) { -+ ubi_close_volume(re->desc); -+ list_del(&re->list); -+ kfree(re); -+ } -+ return err; -+} -+ -+static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) - { - int err = 0; - struct ubi_device *ubi; -@@ -580,9 +803,9 @@ - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; - -- ubi = major_to_device(imajor(inode)); -- if (IS_ERR(ubi)) -- return PTR_ERR(ubi); -+ ubi = ubi_get_by_major(imajor(file->f_mapping->host)); -+ if (!ubi) -+ return -ENODEV; - - switch (cmd) { - /* Create volume command */ -@@ -590,21 +813,21 @@ - { - struct ubi_mkvol_req req; - -- dbg_msg("create volume"); -- err = copy_from_user(&req, argp, -- sizeof(struct ubi_mkvol_req)); -+ dbg_gen("create volume"); -+ err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req)); - if (err) { - err = -EFAULT; - break; - } - -+ req.name[req.name_len] = '\0'; - err = verify_mkvol_req(ubi, &req); - if (err) - break; - -- req.name[req.name_len] = '\0'; -- -+ mutex_lock(&ubi->volumes_mutex); - err = ubi_create_volume(ubi, &req); -+ mutex_unlock(&ubi->volumes_mutex); - if (err) - break; - -@@ -620,7 +843,7 @@ - { - int vol_id; - -- dbg_msg("remove volume"); -+ dbg_gen("remove volume"); - err = get_user(vol_id, (__user int32_t *)argp); - if (err) { - err = -EFAULT; -@@ -633,10 +856,16 @@ - break; - } - -- err = ubi_remove_volume(desc); -- if (err) -- ubi_close_volume(desc); -+ mutex_lock(&ubi->volumes_mutex); -+ err = ubi_remove_volume(desc, 0); -+ mutex_unlock(&ubi->volumes_mutex); - -+ /* -+ * The volume is deleted (unless an error occurred), and the -+ * 'struct ubi_volume' object will be freed when -+ * 'ubi_close_volume()' will call 'put_device()'. -+ */ -+ ubi_close_volume(desc); - break; - } - -@@ -644,12 +873,10 @@ - case UBI_IOCRSVOL: - { - int pebs; -- uint64_t tmp; - struct ubi_rsvol_req req; - -- dbg_msg("re-size volume"); -- err = copy_from_user(&req, argp, -- sizeof(struct ubi_rsvol_req)); -+ dbg_gen("re-size volume"); -+ err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req)); - if (err) { - err = -EFAULT; - break; -@@ -665,15 +892,120 @@ - break; - } - -- tmp = req.bytes; -- pebs = !!do_div(tmp, desc->vol->usable_leb_size); -- pebs += tmp; -+ pebs = div_u64(req.bytes + desc->vol->usable_leb_size - 1, -+ desc->vol->usable_leb_size); - -+ mutex_lock(&ubi->volumes_mutex); - err = ubi_resize_volume(desc, pebs); -+ mutex_unlock(&ubi->volumes_mutex); - ubi_close_volume(desc); - break; - } - -+ /* Re-name volumes command */ -+ case UBI_IOCRNVOL: -+ { -+ struct ubi_rnvol_req *req; -+ -+ dbg_msg("re-name volumes"); -+ req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL); -+ if (!req) { -+ err = -ENOMEM; -+ break; -+ }; -+ -+ err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req)); -+ if (err) { -+ err = -EFAULT; -+ kfree(req); -+ break; -+ } -+ -+ mutex_lock(&ubi->mult_mutex); -+ err = rename_volumes(ubi, req); -+ mutex_unlock(&ubi->mult_mutex); -+ kfree(req); -+ break; -+ } -+ -+ default: -+ err = -ENOTTY; -+ break; -+ } -+ -+ ubi_put_device(ubi); -+ return err; -+} -+ -+static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ void __user *argp = (void __user *)arg; -+ -+ if (!capable(CAP_SYS_RESOURCE)) -+ return -EPERM; -+ -+ switch (cmd) { -+ /* Attach an MTD device command */ -+ case UBI_IOCATT: -+ { -+ struct ubi_attach_req req; -+ struct mtd_info *mtd; -+ -+ dbg_gen("attach MTD device"); -+ err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req)); -+ if (err) { -+ err = -EFAULT; -+ break; -+ } -+ -+ if (req.mtd_num < 0 || -+ (req.ubi_num < 0 && req.ubi_num != UBI_DEV_NUM_AUTO)) { -+ err = -EINVAL; -+ break; -+ } -+ -+ mtd = get_mtd_device(NULL, req.mtd_num); -+ if (IS_ERR(mtd)) { -+ err = PTR_ERR(mtd); -+ break; -+ } -+ -+ /* -+ * Note, further request verification is done by -+ * 'ubi_attach_mtd_dev()'. -+ */ -+ mutex_lock(&ubi_devices_mutex); -+ err = ubi_attach_mtd_dev(mtd, req.ubi_num, req.vid_hdr_offset); -+ mutex_unlock(&ubi_devices_mutex); -+ if (err < 0) -+ put_mtd_device(mtd); -+ else -+ /* @err contains UBI device number */ -+ err = put_user(err, (__user int32_t *)argp); -+ -+ break; -+ } -+ -+ /* Detach an MTD device command */ -+ case UBI_IOCDET: -+ { -+ int ubi_num; -+ -+ dbg_gen("dettach MTD device"); -+ err = get_user(ubi_num, (__user int32_t *)argp); -+ if (err) { -+ err = -EFAULT; -+ break; -+ } -+ -+ mutex_lock(&ubi_devices_mutex); -+ err = ubi_detach_mtd_dev(ubi_num, 0); -+ mutex_unlock(&ubi_devices_mutex); -+ break; -+ } -+ - default: - err = -ENOTTY; - break; -@@ -682,20 +1014,59 @@ - return err; - } - -+#ifdef CONFIG_COMPAT -+static long vol_cdev_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ unsigned long translated_arg = (unsigned long)compat_ptr(arg); -+ -+ return vol_cdev_ioctl(file, cmd, translated_arg); -+} -+ -+static long ubi_cdev_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ unsigned long translated_arg = (unsigned long)compat_ptr(arg); -+ -+ return ubi_cdev_ioctl(file, cmd, translated_arg); -+} -+ -+static long ctrl_cdev_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ unsigned long translated_arg = (unsigned long)compat_ptr(arg); -+ -+ return ctrl_cdev_ioctl(file, cmd, translated_arg); -+} -+#else -+#define vol_cdev_compat_ioctl NULL -+#define ubi_cdev_compat_ioctl NULL -+#define ctrl_cdev_compat_ioctl NULL -+#endif -+ -+/* UBI volume character device operations */ -+const struct file_operations ubi_vol_cdev_operations = { -+ .owner = THIS_MODULE, -+ .open = vol_cdev_open, -+ .release = vol_cdev_release, -+ .llseek = vol_cdev_llseek, -+ .read = vol_cdev_read, -+ .write = vol_cdev_write, -+ .unlocked_ioctl = vol_cdev_ioctl, -+ .compat_ioctl = vol_cdev_compat_ioctl, -+}; -+ - /* UBI character device operations */ --struct file_operations ubi_cdev_operations = { -- .owner = THIS_MODULE, -- .ioctl = ubi_cdev_ioctl, -- .llseek = no_llseek, -+const struct file_operations ubi_cdev_operations = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .unlocked_ioctl = ubi_cdev_ioctl, -+ .compat_ioctl = ubi_cdev_compat_ioctl, - }; - --/* UBI volume character device operations */ --struct file_operations ubi_vol_cdev_operations = { -- .owner = THIS_MODULE, -- .open = vol_cdev_open, -- .release = vol_cdev_release, -- .llseek = vol_cdev_llseek, -- .read = vol_cdev_read, -- .write = vol_cdev_write, -- .ioctl = vol_cdev_ioctl, -+/* UBI control character device operations */ -+const struct file_operations ubi_ctrl_cdev_operations = { -+ .owner = THIS_MODULE, -+ .unlocked_ioctl = ctrl_cdev_ioctl, -+ .compat_ioctl = ctrl_cdev_compat_ioctl, - }; -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/debug.c linux-2.6.24/drivers/mtd/ubi/debug.c ---- linux-2.6.24.orig/drivers/mtd/ubi/debug.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/debug.c 2009-04-17 09:49:26.000000000 +0200 -@@ -24,7 +24,7 @@ - * changes. - */ - --#ifdef CONFIG_MTD_UBI_DEBUG_MSG -+#ifdef CONFIG_MTD_UBI_DEBUG - - #include "ubi.h" - -@@ -34,14 +34,19 @@ - */ - void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) - { -- dbg_msg("erase counter header dump:"); -- dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic)); -- dbg_msg("version %d", (int)ec_hdr->version); -- dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec)); -- dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset)); -- dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset)); -- dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc)); -- dbg_msg("erase counter header hexdump:"); -+ printk(KERN_DEBUG "Erase counter header dump:\n"); -+ printk(KERN_DEBUG "\tmagic %#08x\n", -+ be32_to_cpu(ec_hdr->magic)); -+ printk(KERN_DEBUG "\tversion %d\n", (int)ec_hdr->version); -+ printk(KERN_DEBUG "\tec %llu\n", -+ (long long)be64_to_cpu(ec_hdr->ec)); -+ printk(KERN_DEBUG "\tvid_hdr_offset %d\n", -+ be32_to_cpu(ec_hdr->vid_hdr_offset)); -+ printk(KERN_DEBUG "\tdata_offset %d\n", -+ be32_to_cpu(ec_hdr->data_offset)); -+ printk(KERN_DEBUG "\thdr_crc %#08x\n", -+ be32_to_cpu(ec_hdr->hdr_crc)); -+ printk(KERN_DEBUG "erase counter header hexdump:\n"); - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, - ec_hdr, UBI_EC_HDR_SIZE, 1); - } -@@ -52,22 +57,23 @@ - */ - void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) - { -- dbg_msg("volume identifier header dump:"); -- dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic)); -- dbg_msg("version %d", (int)vid_hdr->version); -- dbg_msg("vol_type %d", (int)vid_hdr->vol_type); -- dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); -- dbg_msg("compat %d", (int)vid_hdr->compat); -- dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id)); -- dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum)); -- dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver)); -- dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size)); -- dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs)); -- dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad)); -- dbg_msg("sqnum %llu", -+ printk(KERN_DEBUG "Volume identifier header dump:\n"); -+ printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); -+ printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); -+ printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); -+ printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); -+ printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); -+ printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); -+ printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); -+ printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); -+ printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); -+ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); -+ printk(KERN_DEBUG "\tsqnum %llu\n", - (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); -- dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc)); -- dbg_msg("volume identifier header hexdump:"); -+ printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); -+ printk(KERN_DEBUG "Volume identifier header hexdump:\n"); -+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, -+ vid_hdr, UBI_VID_HDR_SIZE, 1); - } - - /** -@@ -76,27 +82,27 @@ - */ - void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) - { -- dbg_msg("volume information dump:"); -- dbg_msg("vol_id %d", vol->vol_id); -- dbg_msg("reserved_pebs %d", vol->reserved_pebs); -- dbg_msg("alignment %d", vol->alignment); -- dbg_msg("data_pad %d", vol->data_pad); -- dbg_msg("vol_type %d", vol->vol_type); -- dbg_msg("name_len %d", vol->name_len); -- dbg_msg("usable_leb_size %d", vol->usable_leb_size); -- dbg_msg("used_ebs %d", vol->used_ebs); -- dbg_msg("used_bytes %lld", vol->used_bytes); -- dbg_msg("last_eb_bytes %d", vol->last_eb_bytes); -- dbg_msg("corrupted %d", vol->corrupted); -- dbg_msg("upd_marker %d", vol->upd_marker); -+ printk(KERN_DEBUG "Volume information dump:\n"); -+ printk(KERN_DEBUG "\tvol_id %d\n", vol->vol_id); -+ printk(KERN_DEBUG "\treserved_pebs %d\n", vol->reserved_pebs); -+ printk(KERN_DEBUG "\talignment %d\n", vol->alignment); -+ printk(KERN_DEBUG "\tdata_pad %d\n", vol->data_pad); -+ printk(KERN_DEBUG "\tvol_type %d\n", vol->vol_type); -+ printk(KERN_DEBUG "\tname_len %d\n", vol->name_len); -+ printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size); -+ printk(KERN_DEBUG "\tused_ebs %d\n", vol->used_ebs); -+ printk(KERN_DEBUG "\tused_bytes %lld\n", vol->used_bytes); -+ printk(KERN_DEBUG "\tlast_eb_bytes %d\n", vol->last_eb_bytes); -+ printk(KERN_DEBUG "\tcorrupted %d\n", vol->corrupted); -+ printk(KERN_DEBUG "\tupd_marker %d\n", vol->upd_marker); - - if (vol->name_len <= UBI_VOL_NAME_MAX && - strnlen(vol->name, vol->name_len + 1) == vol->name_len) { -- dbg_msg("name %s", vol->name); -+ printk(KERN_DEBUG "\tname %s\n", vol->name); - } else { -- dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c", -- vol->name[0], vol->name[1], vol->name[2], -- vol->name[3], vol->name[4]); -+ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n", -+ vol->name[0], vol->name[1], vol->name[2], -+ vol->name[3], vol->name[4]); - } - } - -@@ -109,28 +115,29 @@ - { - int name_len = be16_to_cpu(r->name_len); - -- dbg_msg("volume table record %d dump:", idx); -- dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs)); -- dbg_msg("alignment %d", be32_to_cpu(r->alignment)); -- dbg_msg("data_pad %d", be32_to_cpu(r->data_pad)); -- dbg_msg("vol_type %d", (int)r->vol_type); -- dbg_msg("upd_marker %d", (int)r->upd_marker); -- dbg_msg("name_len %d", name_len); -+ printk(KERN_DEBUG "Volume table record %d dump:\n", idx); -+ printk(KERN_DEBUG "\treserved_pebs %d\n", -+ be32_to_cpu(r->reserved_pebs)); -+ printk(KERN_DEBUG "\talignment %d\n", be32_to_cpu(r->alignment)); -+ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(r->data_pad)); -+ printk(KERN_DEBUG "\tvol_type %d\n", (int)r->vol_type); -+ printk(KERN_DEBUG "\tupd_marker %d\n", (int)r->upd_marker); -+ printk(KERN_DEBUG "\tname_len %d\n", name_len); - - if (r->name[0] == '\0') { -- dbg_msg("name NULL"); -+ printk(KERN_DEBUG "\tname NULL\n"); - return; - } - - if (name_len <= UBI_VOL_NAME_MAX && - strnlen(&r->name[0], name_len + 1) == name_len) { -- dbg_msg("name %s", &r->name[0]); -+ printk(KERN_DEBUG "\tname %s\n", &r->name[0]); - } else { -- dbg_msg("1st 5 characters of the name: %c%c%c%c%c", -+ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n", - r->name[0], r->name[1], r->name[2], r->name[3], - r->name[4]); - } -- dbg_msg("crc %#08x", be32_to_cpu(r->crc)); -+ printk(KERN_DEBUG "\tcrc %#08x\n", be32_to_cpu(r->crc)); - } - - /** -@@ -139,15 +146,15 @@ - */ - void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) - { -- dbg_msg("volume scanning information dump:"); -- dbg_msg("vol_id %d", sv->vol_id); -- dbg_msg("highest_lnum %d", sv->highest_lnum); -- dbg_msg("leb_count %d", sv->leb_count); -- dbg_msg("compat %d", sv->compat); -- dbg_msg("vol_type %d", sv->vol_type); -- dbg_msg("used_ebs %d", sv->used_ebs); -- dbg_msg("last_data_size %d", sv->last_data_size); -- dbg_msg("data_pad %d", sv->data_pad); -+ printk(KERN_DEBUG "Volume scanning information dump:\n"); -+ printk(KERN_DEBUG "\tvol_id %d\n", sv->vol_id); -+ printk(KERN_DEBUG "\thighest_lnum %d\n", sv->highest_lnum); -+ printk(KERN_DEBUG "\tleb_count %d\n", sv->leb_count); -+ printk(KERN_DEBUG "\tcompat %d\n", sv->compat); -+ printk(KERN_DEBUG "\tvol_type %d\n", sv->vol_type); -+ printk(KERN_DEBUG "\tused_ebs %d\n", sv->used_ebs); -+ printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size); -+ printk(KERN_DEBUG "\tdata_pad %d\n", sv->data_pad); - } - - /** -@@ -157,14 +164,13 @@ - */ - void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type) - { -- dbg_msg("eraseblock scanning information dump:"); -- dbg_msg("ec %d", seb->ec); -- dbg_msg("pnum %d", seb->pnum); -+ printk(KERN_DEBUG "eraseblock scanning information dump:\n"); -+ printk(KERN_DEBUG "\tec %d\n", seb->ec); -+ printk(KERN_DEBUG "\tpnum %d\n", seb->pnum); - if (type == 0) { -- dbg_msg("lnum %d", seb->lnum); -- dbg_msg("scrub %d", seb->scrub); -- dbg_msg("sqnum %llu", seb->sqnum); -- dbg_msg("leb_ver %u", seb->leb_ver); -+ printk(KERN_DEBUG "\tlnum %d\n", seb->lnum); -+ printk(KERN_DEBUG "\tscrub %d\n", seb->scrub); -+ printk(KERN_DEBUG "\tsqnum %llu\n", seb->sqnum); - } - } - -@@ -176,16 +182,16 @@ - { - char nm[17]; - -- dbg_msg("volume creation request dump:"); -- dbg_msg("vol_id %d", req->vol_id); -- dbg_msg("alignment %d", req->alignment); -- dbg_msg("bytes %lld", (long long)req->bytes); -- dbg_msg("vol_type %d", req->vol_type); -- dbg_msg("name_len %d", req->name_len); -+ printk(KERN_DEBUG "Volume creation request dump:\n"); -+ printk(KERN_DEBUG "\tvol_id %d\n", req->vol_id); -+ printk(KERN_DEBUG "\talignment %d\n", req->alignment); -+ printk(KERN_DEBUG "\tbytes %lld\n", (long long)req->bytes); -+ printk(KERN_DEBUG "\tvol_type %d\n", req->vol_type); -+ printk(KERN_DEBUG "\tname_len %d\n", req->name_len); - - memcpy(nm, req->name, 16); - nm[16] = 0; -- dbg_msg("the 1st 16 characters of the name: %s", nm); -+ printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm); - } - --#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ -+#endif /* CONFIG_MTD_UBI_DEBUG */ -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/debug.h linux-2.6.24/drivers/mtd/ubi/debug.h ---- linux-2.6.24.orig/drivers/mtd/ubi/debug.h 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/debug.h 2009-04-17 09:49:26.000000000 +0200 -@@ -24,23 +24,19 @@ - #ifdef CONFIG_MTD_UBI_DEBUG - #include <linux/random.h> - --#define ubi_assert(expr) BUG_ON(!(expr)) - #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) --#else --#define ubi_assert(expr) ({}) --#define dbg_err(fmt, ...) ({}) --#endif - --#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT --#define DBG_DISABLE_BGT 1 --#else --#define DBG_DISABLE_BGT 0 --#endif -+#define ubi_assert(expr) do { \ -+ if (unlikely(!(expr))) { \ -+ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ -+ __func__, __LINE__, current->pid); \ -+ ubi_dbg_dump_stack(); \ -+ } \ -+} while (0) - --#ifdef CONFIG_MTD_UBI_DEBUG_MSG --/* Generic debugging message */ --#define dbg_msg(fmt, ...) \ -- printk(KERN_DEBUG "UBI DBG: %s: " fmt "\n", __FUNCTION__, ##__VA_ARGS__) -+#define dbg_msg(fmt, ...) \ -+ printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \ -+ current->pid, __FUNCTION__, ##__VA_ARGS__) - - #define ubi_dbg_dump_stack() dump_stack() - -@@ -60,54 +56,47 @@ - void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); - void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); - -+#ifdef CONFIG_MTD_UBI_DEBUG_MSG -+/* General debugging messages */ -+#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) - #else -- --#define dbg_msg(fmt, ...) ({}) --#define ubi_dbg_dump_stack() ({}) --#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) --#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) --#define ubi_dbg_dump_vol_info(vol) ({}) --#define ubi_dbg_dump_vtbl_record(r, idx) ({}) --#define ubi_dbg_dump_sv(sv) ({}) --#define ubi_dbg_dump_seb(seb, type) ({}) --#define ubi_dbg_dump_mkvol_req(req) ({}) -- --#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ -+#define dbg_gen(fmt, ...) ({}) -+#endif - - #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA --/* Messages from the eraseblock association unit */ --#define dbg_eba(fmt, ...) \ -- printk(KERN_DEBUG "UBI DBG eba: %s: " fmt "\n", __FUNCTION__, \ -- ##__VA_ARGS__) -+/* Messages from the eraseblock association sub-system */ -+#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) - #else - #define dbg_eba(fmt, ...) ({}) - #endif - - #ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL --/* Messages from the wear-leveling unit */ --#define dbg_wl(fmt, ...) \ -- printk(KERN_DEBUG "UBI DBG wl: %s: " fmt "\n", __FUNCTION__, \ -- ##__VA_ARGS__) -+/* Messages from the wear-leveling sub-system */ -+#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) - #else - #define dbg_wl(fmt, ...) ({}) - #endif - - #ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO --/* Messages from the input/output unit */ --#define dbg_io(fmt, ...) \ -- printk(KERN_DEBUG "UBI DBG io: %s: " fmt "\n", __FUNCTION__, \ -- ##__VA_ARGS__) -+/* Messages from the input/output sub-system */ -+#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) - #else - #define dbg_io(fmt, ...) ({}) - #endif - - #ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD - /* Initialization and build messages */ --#define dbg_bld(fmt, ...) \ -- printk(KERN_DEBUG "UBI DBG bld: %s: " fmt "\n", __FUNCTION__, \ -- ##__VA_ARGS__) -+#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define UBI_IO_DEBUG 1 - #else - #define dbg_bld(fmt, ...) ({}) -+#define UBI_IO_DEBUG 0 -+#endif -+ -+#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT -+#define DBG_DISABLE_BGT 1 -+#else -+#define DBG_DISABLE_BGT 0 - #endif - - #ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS -@@ -154,4 +143,50 @@ - #define ubi_dbg_is_erase_failure() 0 - #endif - -+#else -+ -+#define ubi_assert(expr) ({}) -+#define dbg_err(fmt, ...) ({}) -+#define dbg_msg(fmt, ...) ({}) -+#define dbg_gen(fmt, ...) ({}) -+#define dbg_eba(fmt, ...) ({}) -+#define dbg_wl(fmt, ...) ({}) -+#define dbg_io(fmt, ...) ({}) -+#define dbg_bld(fmt, ...) ({}) -+#define ubi_dbg_dump_stack() ({}) -+#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) -+#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) -+#define ubi_dbg_dump_vol_info(vol) ({}) -+#define ubi_dbg_dump_vtbl_record(r, idx) ({}) -+#define ubi_dbg_dump_sv(sv) ({}) -+#define ubi_dbg_dump_seb(seb, type) ({}) -+#define ubi_dbg_dump_mkvol_req(req) ({}) -+ -+#define UBI_IO_DEBUG 0 -+#define DBG_DISABLE_BGT 0 -+#define ubi_dbg_is_bitflip() 0 -+#define ubi_dbg_is_write_failure() 0 -+#define ubi_dbg_is_erase_failure() 0 -+ -+#endif /* !CONFIG_MTD_UBI_DEBUG */ -+ -+/* -+ * Some compatibility stuff goes here. -+ */ -+ -+#include <asm/div64.h> -+ -+static inline uint64_t div_u64(uint64_t dividend, uint64_t divisor) -+{ -+ do_div(dividend, divisor); -+ return dividend; -+} -+ -+static inline uint64_t div_u64_rem(uint64_t dividend, uint32_t divisor, -+ uint32_t *remainder) -+{ -+ *remainder = do_div(dividend, divisor); -+ return dividend; -+} -+ - #endif /* !__UBI_DEBUG_H__ */ -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/eba.c linux-2.6.24/drivers/mtd/ubi/eba.c ---- linux-2.6.24.orig/drivers/mtd/ubi/eba.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/eba.c 2009-04-17 09:49:26.000000000 +0200 -@@ -19,20 +19,20 @@ - */ - - /* -- * The UBI Eraseblock Association (EBA) unit. -+ * The UBI Eraseblock Association (EBA) sub-system. - * -- * This unit is responsible for I/O to/from logical eraseblock. -+ * This sub-system is responsible for I/O to/from logical eraseblock. - * - * Although in this implementation the EBA table is fully kept and managed in - * RAM, which assumes poor scalability, it might be (partially) maintained on - * flash in future implementations. - * -- * The EBA unit implements per-logical eraseblock locking. Before accessing a -- * logical eraseblock it is locked for reading or writing. The per-logical -- * eraseblock locking is implemented by means of the lock tree. The lock tree -- * is an RB-tree which refers all the currently locked logical eraseblocks. The -- * lock tree elements are &struct ltree_entry objects. They are indexed by -- * (@vol_id, @lnum) pairs. -+ * The EBA sub-system implements per-logical eraseblock locking. Before -+ * accessing a logical eraseblock it is locked for reading or writing. The -+ * per-logical eraseblock locking is implemented by means of the lock tree. The -+ * lock tree is an RB-tree which refers all the currently locked logical -+ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. -+ * They are indexed by (@vol_id, @lnum) pairs. - * - * EBA also maintains the global sequence counter which is incremented each - * time a logical eraseblock is mapped to a physical eraseblock and it is -@@ -50,29 +50,6 @@ - #define EBA_RESERVED_PEBS 1 - - /** -- * struct ltree_entry - an entry in the lock tree. -- * @rb: links RB-tree nodes -- * @vol_id: volume ID of the locked logical eraseblock -- * @lnum: locked logical eraseblock number -- * @users: how many tasks are using this logical eraseblock or wait for it -- * @mutex: read/write mutex to implement read/write access serialization to -- * the (@vol_id, @lnum) logical eraseblock -- * -- * When a logical eraseblock is being locked - corresponding &struct ltree_entry -- * object is inserted to the lock tree (@ubi->ltree). -- */ --struct ltree_entry { -- struct rb_node rb; -- int vol_id; -- int lnum; -- int users; -- struct rw_semaphore mutex; --}; -- --/* Slab cache for lock-tree entries */ --static struct kmem_cache *ltree_slab; -- --/** - * next_sqnum - get next sequence number. - * @ubi: UBI device description object - * -@@ -101,7 +78,7 @@ - */ - static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) - { -- if (vol_id == UBI_LAYOUT_VOL_ID) -+ if (vol_id == UBI_LAYOUT_VOLUME_ID) - return UBI_LAYOUT_VOLUME_COMPAT; - return 0; - } -@@ -112,20 +89,20 @@ - * @vol_id: volume ID - * @lnum: logical eraseblock number - * -- * This function returns a pointer to the corresponding &struct ltree_entry -+ * This function returns a pointer to the corresponding &struct ubi_ltree_entry - * object if the logical eraseblock is locked and %NULL if it is not. - * @ubi->ltree_lock has to be locked. - */ --static struct ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, -- int lnum) -+static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, -+ int lnum) - { - struct rb_node *p; - - p = ubi->ltree.rb_node; - while (p) { -- struct ltree_entry *le; -+ struct ubi_ltree_entry *le; - -- le = rb_entry(p, struct ltree_entry, rb); -+ le = rb_entry(p, struct ubi_ltree_entry, rb); - - if (vol_id < le->vol_id) - p = p->rb_left; -@@ -155,15 +132,17 @@ - * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation - * failed. - */ --static struct ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id, -- int lnum) -+static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, -+ int vol_id, int lnum) - { -- struct ltree_entry *le, *le1, *le_free; -+ struct ubi_ltree_entry *le, *le1, *le_free; - -- le = kmem_cache_alloc(ltree_slab, GFP_NOFS); -+ le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); - if (!le) - return ERR_PTR(-ENOMEM); - -+ le->users = 0; -+ init_rwsem(&le->mutex); - le->vol_id = vol_id; - le->lnum = lnum; - -@@ -189,7 +168,7 @@ - p = &ubi->ltree.rb_node; - while (*p) { - parent = *p; -- le1 = rb_entry(parent, struct ltree_entry, rb); -+ le1 = rb_entry(parent, struct ubi_ltree_entry, rb); - - if (vol_id < le1->vol_id) - p = &(*p)->rb_left; -@@ -210,9 +189,7 @@ - le->users += 1; - spin_unlock(&ubi->ltree_lock); - -- if (le_free) -- kmem_cache_free(ltree_slab, le_free); -- -+ kfree(le_free); - return le; - } - -@@ -227,7 +204,7 @@ - */ - static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) - { -- struct ltree_entry *le; -+ struct ubi_ltree_entry *le; - - le = ltree_add_entry(ubi, vol_id, lnum); - if (IS_ERR(le)) -@@ -244,22 +221,18 @@ - */ - static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) - { -- int free = 0; -- struct ltree_entry *le; -+ struct ubi_ltree_entry *le; - - spin_lock(&ubi->ltree_lock); - le = ltree_lookup(ubi, vol_id, lnum); - le->users -= 1; - ubi_assert(le->users >= 0); -+ up_read(&le->mutex); - if (le->users == 0) { - rb_erase(&le->rb, &ubi->ltree); -- free = 1; -+ kfree(le); - } - spin_unlock(&ubi->ltree_lock); -- -- up_read(&le->mutex); -- if (free) -- kmem_cache_free(ltree_slab, le); - } - - /** -@@ -273,7 +246,7 @@ - */ - static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) - { -- struct ltree_entry *le; -+ struct ubi_ltree_entry *le; - - le = ltree_add_entry(ubi, vol_id, lnum); - if (IS_ERR(le)) -@@ -283,6 +256,40 @@ - } - - /** -+ * leb_write_lock - lock logical eraseblock for writing. -+ * @ubi: UBI device description object -+ * @vol_id: volume ID -+ * @lnum: logical eraseblock number -+ * -+ * This function locks a logical eraseblock for writing if there is no -+ * contention and does nothing if there is contention. Returns %0 in case of -+ * success, %1 in case of contention, and and a negative error code in case of -+ * failure. -+ */ -+static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) -+{ -+ struct ubi_ltree_entry *le; -+ -+ le = ltree_add_entry(ubi, vol_id, lnum); -+ if (IS_ERR(le)) -+ return PTR_ERR(le); -+ if (down_write_trylock(&le->mutex)) -+ return 0; -+ -+ /* Contention, cancel */ -+ spin_lock(&ubi->ltree_lock); -+ le->users -= 1; -+ ubi_assert(le->users >= 0); -+ if (le->users == 0) { -+ rb_erase(&le->rb, &ubi->ltree); -+ kfree(le); -+ } -+ spin_unlock(&ubi->ltree_lock); -+ -+ return 1; -+} -+ -+/** - * leb_write_unlock - unlock logical eraseblock. - * @ubi: UBI device description object - * @vol_id: volume ID -@@ -290,39 +297,34 @@ - */ - static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) - { -- int free; -- struct ltree_entry *le; -+ struct ubi_ltree_entry *le; - - spin_lock(&ubi->ltree_lock); - le = ltree_lookup(ubi, vol_id, lnum); - le->users -= 1; - ubi_assert(le->users >= 0); -+ up_write(&le->mutex); - if (le->users == 0) { - rb_erase(&le->rb, &ubi->ltree); -- free = 1; -- } else -- free = 0; -+ kfree(le); -+ } - spin_unlock(&ubi->ltree_lock); -- -- up_write(&le->mutex); -- if (free) -- kmem_cache_free(ltree_slab, le); - } - - /** - * ubi_eba_unmap_leb - un-map logical eraseblock. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @lnum: logical eraseblock number - * - * This function un-maps logical eraseblock @lnum and schedules corresponding - * physical eraseblock for erasure. Returns zero in case of success and a - * negative error code in case of failure. - */ --int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum) -+int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, -+ int lnum) - { -- int idx = vol_id2idx(ubi, vol_id), err, pnum; -- struct ubi_volume *vol = ubi->volumes[idx]; -+ int err, pnum, vol_id = vol->vol_id; - - if (ubi->ro_mode) - return -EROFS; -@@ -349,7 +351,7 @@ - /** - * ubi_eba_read_leb - read data. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @lnum: logical eraseblock number - * @buf: buffer to store the read data - * @offset: offset from where to read -@@ -365,12 +367,11 @@ - * returned for any volume type if an ECC error was detected by the MTD device - * driver. Other negative error cored may be returned in case of other errors. - */ --int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, -- int offset, int len, int check) -+int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, -+ void *buf, int offset, int len, int check) - { -- int err, pnum, scrub = 0, idx = vol_id2idx(ubi, vol_id); -+ int err, pnum, scrub = 0, vol_id = vol->vol_id; - struct ubi_vid_hdr *vid_hdr; -- struct ubi_volume *vol = ubi->volumes[idx]; - uint32_t uninitialized_var(crc); - - err = leb_read_lock(ubi, vol_id, lnum); -@@ -500,16 +501,12 @@ - struct ubi_vid_hdr *vid_hdr; - - vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); -- if (!vid_hdr) { -+ if (!vid_hdr) - return -ENOMEM; -- } -- -- mutex_lock(&ubi->buf_mutex); - - retry: - new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); - if (new_pnum < 0) { -- mutex_unlock(&ubi->buf_mutex); - ubi_free_vid_hdr(ubi, vid_hdr); - return new_pnum; - } -@@ -529,20 +526,23 @@ - goto write_error; - - data_size = offset + len; -+ mutex_lock(&ubi->buf_mutex); - memset(ubi->peb_buf1 + offset, 0xFF, len); - - /* Read everything before the area where the write failure happened */ - if (offset > 0) { - err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); - if (err && err != UBI_IO_BITFLIPS) -- goto out_put; -+ goto out_unlock; - } - - memcpy(ubi->peb_buf1 + offset, buf, len); - - err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); -- if (err) -+ if (err) { -+ mutex_unlock(&ubi->buf_mutex); - goto write_error; -+ } - - mutex_unlock(&ubi->buf_mutex); - ubi_free_vid_hdr(ubi, vid_hdr); -@@ -553,8 +553,9 @@ - ubi_msg("data was successfully recovered"); - return 0; - --out_put: -+out_unlock: - mutex_unlock(&ubi->buf_mutex); -+out_put: - ubi_wl_put_peb(ubi, new_pnum, 1); - ubi_free_vid_hdr(ubi, vid_hdr); - return err; -@@ -567,7 +568,6 @@ - ubi_warn("failed to write to PEB %d", new_pnum); - ubi_wl_put_peb(ubi, new_pnum, 1); - if (++tries > UBI_IO_RETRIES) { -- mutex_unlock(&ubi->buf_mutex); - ubi_free_vid_hdr(ubi, vid_hdr); - return err; - } -@@ -578,7 +578,7 @@ - /** - * ubi_eba_write_leb - write data to dynamic volume. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @lnum: logical eraseblock number - * @buf: the data to write - * @offset: offset within the logical eraseblock where to write -@@ -586,15 +586,14 @@ - * @dtype: data type - * - * This function writes data to logical eraseblock @lnum of a dynamic volume -- * @vol_id. Returns zero in case of success and a negative error code in case -+ * @vol. Returns zero in case of success and a negative error code in case - * of failure. In case of error, it is possible that something was still - * written to the flash media, but may be some garbage. - */ --int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum, -+int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, - const void *buf, int offset, int len, int dtype) - { -- int idx = vol_id2idx(ubi, vol_id), err, pnum, tries = 0; -- struct ubi_volume *vol = ubi->volumes[idx]; -+ int err, pnum, tries = 0, vol_id = vol->vol_id; - struct ubi_vid_hdr *vid_hdr; - - if (ubi->ro_mode) -@@ -613,7 +612,8 @@ - if (err) { - ubi_warn("failed to write data to PEB %d", pnum); - if (err == -EIO && ubi->bad_allowed) -- err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len); -+ err = recover_peb(ubi, pnum, vol_id, lnum, buf, -+ offset, len); - if (err) - ubi_ro_mode(ubi); - } -@@ -656,11 +656,14 @@ - goto write_error; - } - -- err = ubi_io_write_data(ubi, buf, pnum, offset, len); -- if (err) { -- ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, " -- "PEB %d", len, offset, vol_id, lnum, pnum); -- goto write_error; -+ if (len) { -+ err = ubi_io_write_data(ubi, buf, pnum, offset, len); -+ if (err) { -+ ubi_warn("failed to write %d bytes at offset %d of " -+ "LEB %d:%d, PEB %d", len, offset, vol_id, -+ lnum, pnum); -+ goto write_error; -+ } - } - - vol->eba_tbl[lnum] = pnum; -@@ -698,7 +701,7 @@ - /** - * ubi_eba_write_leb_st - write data to static volume. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @lnum: logical eraseblock number - * @buf: data to write - * @len: how many bytes to write -@@ -706,7 +709,7 @@ - * @used_ebs: how many logical eraseblocks will this volume contain - * - * This function writes data to logical eraseblock @lnum of static volume -- * @vol_id. The @used_ebs argument should contain total number of logical -+ * @vol. The @used_ebs argument should contain total number of logical - * eraseblock in this static volume. - * - * When writing to the last logical eraseblock, the @len argument doesn't have -@@ -718,12 +721,11 @@ - * volumes. This function returns zero in case of success and a negative error - * code in case of failure. - */ --int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum, -- const void *buf, int len, int dtype, int used_ebs) -+int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, -+ int lnum, const void *buf, int len, int dtype, -+ int used_ebs) - { -- int err, pnum, tries = 0, data_size = len; -- int idx = vol_id2idx(ubi, vol_id); -- struct ubi_volume *vol = ubi->volumes[idx]; -+ int err, pnum, tries = 0, data_size = len, vol_id = vol->vol_id; - struct ubi_vid_hdr *vid_hdr; - uint32_t crc; - -@@ -734,7 +736,7 @@ - /* If this is the last LEB @len may be unaligned */ - len = ALIGN(data_size, ubi->min_io_size); - else -- ubi_assert(len % ubi->min_io_size == 0); -+ ubi_assert(!(len & (ubi->min_io_size - 1))); - - vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); - if (!vid_hdr) -@@ -819,7 +821,7 @@ - /* - * ubi_eba_atomic_leb_change - change logical eraseblock atomically. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @lnum: logical eraseblock number - * @buf: data to write - * @len: how many bytes to write -@@ -834,17 +836,27 @@ - * UBI reserves one LEB for the "atomic LEB change" operation, so only one - * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. - */ --int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum, -- const void *buf, int len, int dtype) -+int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, -+ int lnum, const void *buf, int len, int dtype) - { -- int err, pnum, tries = 0, idx = vol_id2idx(ubi, vol_id); -- struct ubi_volume *vol = ubi->volumes[idx]; -+ int err, pnum, tries = 0, vol_id = vol->vol_id; - struct ubi_vid_hdr *vid_hdr; - uint32_t crc; - - if (ubi->ro_mode) - return -EROFS; - -+ if (len == 0) { -+ /* -+ * Special case when data length is zero. In this case the LEB -+ * has to be unmapped and mapped somewhere else. -+ */ -+ err = ubi_eba_unmap_leb(ubi, vol, lnum); -+ if (err) -+ return err; -+ return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); -+ } -+ - vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); - if (!vid_hdr) - return -ENOMEM; -@@ -891,7 +903,7 @@ - } - - if (vol->eba_tbl[lnum] >= 0) { -- err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1); -+ err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0); - if (err) - goto out_leb_unlock; - } -@@ -928,20 +940,6 @@ - } - - /** -- * ltree_entry_ctor - lock tree entries slab cache constructor. -- * @obj: the lock-tree entry to construct -- * @cache: the lock tree entry slab cache -- * @flags: constructor flags -- */ --static void ltree_entry_ctor(struct kmem_cache *cache, void *obj) --{ -- struct ltree_entry *le = obj; -- -- le->users = 0; -- init_rwsem(&le->mutex); --} -- --/** - * ubi_eba_copy_leb - copy logical eraseblock. - * @ubi: UBI device description object - * @from: physical eraseblock number from where to copy -@@ -950,14 +948,20 @@ - * - * This function copies logical eraseblock from physical eraseblock @from to - * physical eraseblock @to. The @vid_hdr buffer may be changed by this -- * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation -- * was canceled because bit-flips were detected at the target PEB, and a -- * negative error code in case of failure. -+ * function. Returns: -+ * o %0 in case of success; -+ * o %1 if the operation was canceled because the volume is being deleted -+ * or because the PEB was put meanwhile; -+ * o %2 if the operation was canceled because there was a write error to the -+ * target PEB; -+ * o %-EAGAIN if the operation was canceled because a bit-flip was detected -+ * in the target PEB; -+ * o a negative error code in case of failure. - */ - int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, - struct ubi_vid_hdr *vid_hdr) - { -- int err, vol_id, lnum, data_size, aldata_size, pnum, idx; -+ int err, vol_id, lnum, data_size, aldata_size, idx; - struct ubi_volume *vol; - uint32_t crc; - -@@ -973,51 +977,67 @@ - data_size = aldata_size = - ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); - -- /* -- * We do not want anybody to write to this logical eraseblock while we -- * are moving it, so we lock it. -- */ -- err = leb_write_lock(ubi, vol_id, lnum); -- if (err) -- return err; -- -- mutex_lock(&ubi->buf_mutex); -- -- /* -- * But the logical eraseblock might have been put by this time. -- * Cancel if it is true. -- */ - idx = vol_id2idx(ubi, vol_id); -- -+ spin_lock(&ubi->volumes_lock); - /* -- * We may race with volume deletion/re-size, so we have to hold -- * @ubi->volumes_lock. -+ * Note, we may race with volume deletion, which means that the volume -+ * this logical eraseblock belongs to might be being deleted. Since the -+ * volume deletion un-maps all the volume's logical eraseblocks, it will -+ * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. - */ -- spin_lock(&ubi->volumes_lock); - vol = ubi->volumes[idx]; - if (!vol) { -- dbg_eba("volume %d was removed meanwhile", vol_id); -+ /* No need to do further work, cancel */ -+ dbg_eba("volume %d is being removed, cancel", vol_id); - spin_unlock(&ubi->volumes_lock); -- goto out_unlock; -+ return 1; - } -+ spin_unlock(&ubi->volumes_lock); - -- pnum = vol->eba_tbl[lnum]; -- if (pnum != from) { -- dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " -- "PEB %d, cancel", vol_id, lnum, from, pnum); -- spin_unlock(&ubi->volumes_lock); -- goto out_unlock; -+ /* -+ * We do not want anybody to write to this logical eraseblock while we -+ * are moving it, so lock it. -+ * -+ * Note, we are using non-waiting locking here, because we cannot sleep -+ * on the LEB, since it may cause deadlocks. Indeed, imagine a task is -+ * unmapping the LEB which is mapped to the PEB we are going to move -+ * (@from). This task locks the LEB and goes sleep in the -+ * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are -+ * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the -+ * LEB is already locked, we just do not move it and return %1. -+ */ -+ err = leb_write_trylock(ubi, vol_id, lnum); -+ if (err) { -+ dbg_eba("contention on LEB %d:%d, cancel", vol_id, lnum); -+ return err; - } -- spin_unlock(&ubi->volumes_lock); - -- /* OK, now the LEB is locked and we can safely start moving it */ -+ /* -+ * The LEB might have been put meanwhile, and the task which put it is -+ * probably waiting on @ubi->move_mutex. No need to continue the work, -+ * cancel it. -+ */ -+ if (vol->eba_tbl[lnum] != from) { -+ dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " -+ "PEB %d, cancel", vol_id, lnum, from, -+ vol->eba_tbl[lnum]); -+ err = 1; -+ goto out_unlock_leb; -+ } - -+ /* -+ * OK, now the LEB is locked and we can safely start moving it. Since -+ * this function utilizes the @ubi->peb1_buf buffer which is shared -+ * with some other functions, so lock the buffer by taking the -+ * @ubi->buf_mutex. -+ */ -+ mutex_lock(&ubi->buf_mutex); - dbg_eba("read %d bytes of data", aldata_size); - err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); - if (err && err != UBI_IO_BITFLIPS) { - ubi_warn("error %d while reading data from PEB %d", - err, from); -- goto out_unlock; -+ goto out_unlock_buf; - } - - /* -@@ -1052,8 +1072,11 @@ - vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); - - err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); -- if (err) -- goto out_unlock; -+ if (err) { -+ if (err == -EIO) -+ err = 2; -+ goto out_unlock_buf; -+ } - - cond_resched(); - -@@ -1062,13 +1085,18 @@ - if (err) { - if (err != UBI_IO_BITFLIPS) - ubi_warn("cannot read VID header back from PEB %d", to); -- goto out_unlock; -+ else -+ err = -EAGAIN; -+ goto out_unlock_buf; - } - - if (data_size > 0) { - err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); -- if (err) -- goto out_unlock; -+ if (err) { -+ if (err == -EIO) -+ err = 2; -+ goto out_unlock_buf; -+ } - - cond_resched(); - -@@ -1082,29 +1110,33 @@ - if (err != UBI_IO_BITFLIPS) - ubi_warn("cannot read data back from PEB %d", - to); -- goto out_unlock; -+ else -+ err = -EAGAIN; -+ goto out_unlock_buf; - } - - cond_resched(); - - if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { -- ubi_warn("read data back from PEB %d - it is different", -- to); -- goto out_unlock; -+ ubi_warn("read data back from PEB %d and it is " -+ "different", to); -+ err = -EINVAL; -+ goto out_unlock_buf; - } - } - - ubi_assert(vol->eba_tbl[lnum] == from); - vol->eba_tbl[lnum] = to; - --out_unlock: -+out_unlock_buf: - mutex_unlock(&ubi->buf_mutex); -+out_unlock_leb: - leb_write_unlock(ubi, vol_id, lnum); - return err; - } - - /** -- * ubi_eba_init_scan - initialize the EBA unit using scanning information. -+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information. - * @ubi: UBI device description object - * @si: scanning information - * -@@ -1119,20 +1151,12 @@ - struct ubi_scan_leb *seb; - struct rb_node *rb; - -- dbg_eba("initialize EBA unit"); -+ dbg_eba("initialize EBA sub-system"); - - spin_lock_init(&ubi->ltree_lock); - mutex_init(&ubi->alc_mutex); - ubi->ltree = RB_ROOT; - -- if (ubi_devices_cnt == 0) { -- ltree_slab = kmem_cache_create("ubi_ltree_slab", -- sizeof(struct ltree_entry), 0, -- 0, <ree_entry_ctor); -- if (!ltree_slab) -- return -ENOMEM; -- } -- - ubi->global_sqnum = si->max_sqnum + 1; - num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; - -@@ -1168,6 +1192,15 @@ - } - } - -+ if (ubi->avail_pebs < EBA_RESERVED_PEBS) { -+ ubi_err("no enough physical eraseblocks (%d, need %d)", -+ ubi->avail_pebs, EBA_RESERVED_PEBS); -+ err = -ENOSPC; -+ goto out_free; -+ } -+ ubi->avail_pebs -= EBA_RESERVED_PEBS; -+ ubi->rsvd_pebs += EBA_RESERVED_PEBS; -+ - if (ubi->bad_allowed) { - ubi_calculate_reserved(ubi); - -@@ -1184,16 +1217,7 @@ - ubi->rsvd_pebs += ubi->beb_rsvd_pebs; - } - -- if (ubi->avail_pebs < EBA_RESERVED_PEBS) { -- ubi_err("no enough physical eraseblocks (%d, need %d)", -- ubi->avail_pebs, EBA_RESERVED_PEBS); -- err = -ENOSPC; -- goto out_free; -- } -- ubi->avail_pebs -= EBA_RESERVED_PEBS; -- ubi->rsvd_pebs += EBA_RESERVED_PEBS; -- -- dbg_eba("EBA unit is initialized"); -+ dbg_eba("EBA sub-system is initialized"); - return 0; - - out_free: -@@ -1202,26 +1226,5 @@ - continue; - kfree(ubi->volumes[i]->eba_tbl); - } -- if (ubi_devices_cnt == 0) -- kmem_cache_destroy(ltree_slab); - return err; - } -- --/** -- * ubi_eba_close - close EBA unit. -- * @ubi: UBI device description object -- */ --void ubi_eba_close(const struct ubi_device *ubi) --{ -- int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; -- -- dbg_eba("close EBA unit"); -- -- for (i = 0; i < num_volumes; i++) { -- if (!ubi->volumes[i]) -- continue; -- kfree(ubi->volumes[i]->eba_tbl); -- } -- if (ubi_devices_cnt == 1) -- kmem_cache_destroy(ltree_slab); --} -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/gluebi.c linux-2.6.24/drivers/mtd/ubi/gluebi.c ---- linux-2.6.24.orig/drivers/mtd/ubi/gluebi.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/gluebi.c 2009-04-17 09:49:26.000000000 +0200 +--- ubifs-v2.6.24/drivers/mtd/ubi/gluebi.c 2009-04-07 17:14:47.000000000 +0200 ++++ linux-2.6.24/drivers/mtd/ubi/gluebi.c 2009-04-24 13:08:55.000000000 +0200 @@ -28,7 +28,6 @@ * eraseblock size is equivalent to the logical eraseblock size of the volume. */ --#include <asm/div64.h> - #include "ubi.h" - - /** -@@ -109,9 +108,8 @@ - int err = 0, lnum, offs, total_read; - struct ubi_volume *vol; - struct ubi_device *ubi; -- uint64_t tmp = from; - -- dbg_msg("read %zd bytes from offset %lld", len, from); -+ dbg_gen("read %zd bytes from offset %lld", len, from); - - if (len < 0 || from < 0 || from + len > mtd->size) - return -EINVAL; -@@ -119,9 +117,7 @@ - vol = container_of(mtd, struct ubi_volume, gluebi_mtd); - ubi = vol->ubi; - -- offs = do_div(tmp, mtd->erasesize); -- lnum = tmp; -- -+ lnum = div_u64_rem(from, mtd->erasesize, &offs); - total_read = len; - while (total_read) { - size_t to_read = mtd->erasesize - offs; -@@ -129,8 +125,7 @@ - if (to_read > total_read) - to_read = total_read; - -- err = ubi_eba_read_leb(ubi, vol->vol_id, lnum, buf, offs, -- to_read, 0); -+ err = ubi_eba_read_leb(ubi, vol, lnum, buf, offs, to_read, 0); - if (err) - break; - -@@ -161,9 +156,8 @@ - int err = 0, lnum, offs, total_written; - struct ubi_volume *vol; - struct ubi_device *ubi; -- uint64_t tmp = to; - -- dbg_msg("write %zd bytes to offset %lld", len, to); -+ dbg_gen("write %zd bytes to offset %lld", len, to); - - if (len < 0 || to < 0 || len + to > mtd->size) - return -EINVAL; -@@ -174,8 +168,7 @@ - if (ubi->ro_mode) - return -EROFS; - -- offs = do_div(tmp, mtd->erasesize); -- lnum = tmp; -+ lnum = div_u64_rem(to, mtd->erasesize, &offs); - - if (len % mtd->writesize || offs % mtd->writesize) - return -EINVAL; -@@ -187,8 +180,8 @@ - if (to_write > total_written) - to_write = total_written; - -- err = ubi_eba_write_leb(ubi, vol->vol_id, lnum, buf, offs, -- to_write, UBI_UNKNOWN); -+ err = ubi_eba_write_leb(ubi, vol, lnum, buf, offs, to_write, -+ UBI_UNKNOWN); - if (err) - break; - -@@ -216,7 +209,7 @@ - struct ubi_volume *vol; - struct ubi_device *ubi; - -- dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr); -+ dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr); - - if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize) - return -EINVAL; -@@ -237,7 +230,7 @@ - return -EROFS; - - for (i = 0; i < count; i++) { -- err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum + i); -+ err = ubi_eba_unmap_leb(ubi, vol, lnum + i); - if (err) - goto out_err; - } -@@ -250,8 +243,8 @@ - if (err) - goto out_err; - -- instr->state = MTD_ERASE_DONE; -- mtd_erase_callback(instr); -+ instr->state = MTD_ERASE_DONE; -+ mtd_erase_callback(instr); - return 0; - - out_err: -@@ -292,19 +285,20 @@ - /* - * In case of dynamic volume, MTD device size is just volume size. In - * case of a static volume the size is equivalent to the amount of data -- * bytes, which is zero at this moment and will be changed after volume -- * update. -+ * bytes. - */ - if (vol->vol_type == UBI_DYNAMIC_VOLUME) - mtd->size = vol->usable_leb_size * vol->reserved_pebs; -+ else -+ mtd->size = vol->used_bytes; - - if (add_mtd_device(mtd)) { -- ubi_err("cannot not add MTD device\n"); -+ ubi_err("cannot not add MTD device"); - kfree(mtd->name); - return -ENFILE; - } - -- dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u", -+ dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u", - mtd->index, mtd->name, mtd->size, mtd->erasesize); - return 0; - } -@@ -322,7 +316,7 @@ - int err; - struct mtd_info *mtd = &vol->gluebi_mtd; - -- dbg_msg("remove mtd%d", mtd->index); -+ dbg_gen("remove mtd%d", mtd->index); - err = del_mtd_device(mtd); - if (err) - return err; -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/io.c linux-2.6.24/drivers/mtd/ubi/io.c ---- linux-2.6.24.orig/drivers/mtd/ubi/io.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/io.c 2009-04-17 09:49:26.000000000 +0200 -@@ -20,15 +20,15 @@ - */ - - /* -- * UBI input/output unit. -+ * UBI input/output sub-system. - * -- * This unit provides a uniform way to work with all kinds of the underlying -- * MTD devices. It also implements handy functions for reading and writing UBI -- * headers. -+ * This sub-system provides a uniform way to work with all kinds of the -+ * underlying MTD devices. It also implements handy functions for reading and -+ * writing UBI headers. - * - * We are trying to have a paranoid mindset and not to trust to what we read -- * from the flash media in order to be more secure and robust. So this unit -- * validates every single header it reads from the flash media. -+ * from the flash media in order to be more secure and robust. So this -+ * sub-system validates every single header it reads from the flash media. - * - * Some words about how the eraseblock headers are stored. - * -@@ -79,11 +79,11 @@ - * 512-byte chunks, we have to allocate one more buffer and copy our VID header - * to offset 448 of this buffer. - * -- * The I/O unit does the following trick in order to avoid this extra copy. -- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header -- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the -- * VID header is being written out, it shifts the VID header pointer back and -- * writes the whole sub-page. -+ * The I/O sub-system does the following trick in order to avoid this extra -+ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID -+ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. -+ * When the VID header is being written out, it shifts the VID header pointer -+ * back and writes the whole sub-page. - */ - - #include <linux/crc32.h> -@@ -156,15 +156,19 @@ - /* - * -EUCLEAN is reported if there was a bit-flip which - * was corrected, so this is harmless. -+ * -+ * We do not report about it here unless debugging is -+ * enabled. A corresponding message will be printed -+ * later, when it is has been scrubbed. - */ -- ubi_msg("fixable bit-flip detected at PEB %d", pnum); -+ dbg_msg("fixable bit-flip detected at PEB %d", pnum); - ubi_assert(len == read); - return UBI_IO_BITFLIPS; - } - - if (read != len && retries++ < UBI_IO_RETRIES) { -- dbg_io("error %d while reading %d bytes from PEB %d:%d, " -- "read only %zd bytes, retry", -+ dbg_io("error %d while reading %d bytes from PEB %d:%d," -+ " read only %zd bytes, retry", - err, len, pnum, offset, read); - yield(); - goto retry; -@@ -173,11 +177,21 @@ - ubi_err("error %d while reading %d bytes from PEB %d:%d, " - "read %zd bytes", err, len, pnum, offset, read); - ubi_dbg_dump_stack(); -+ -+ /* -+ * The driver should never return -EBADMSG if it failed to read -+ * all the requested data. But some buggy drivers might do -+ * this, so we change it to -EIO. -+ */ -+ if (read != len && err == -EBADMSG) { -+ ubi_assert(0); -+ err = -EIO; -+ } - } else { - ubi_assert(len == read); - - if (ubi_dbg_is_bitflip()) { -- dbg_msg("bit-flip (emulated)"); -+ dbg_gen("bit-flip (emulated)"); - err = UBI_IO_BITFLIPS; - } - } -@@ -381,6 +395,7 @@ - { - int err, i, patt_count; - -+ ubi_msg("run torture test for PEB %d", pnum); - patt_count = ARRAY_SIZE(patterns); - ubi_assert(patt_count > 0); - -@@ -424,6 +439,7 @@ - } - - err = patt_count; -+ ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum); - - out: - mutex_unlock(&ubi->buf_mutex); -@@ -667,6 +683,9 @@ - if (verbose) - ubi_warn("no EC header found at PEB %d, " - "only 0xFF bytes", pnum); -+ else if (UBI_IO_DEBUG) -+ dbg_msg("no EC header found at PEB %d, " -+ "only 0xFF bytes", pnum); - return UBI_IO_PEB_EMPTY; - } - -@@ -678,7 +697,9 @@ - ubi_warn("bad magic number at PEB %d: %08x instead of " - "%08x", pnum, magic, UBI_EC_HDR_MAGIC); - ubi_dbg_dump_ec_hdr(ec_hdr); -- } -+ } else if (UBI_IO_DEBUG) -+ dbg_msg("bad magic number at PEB %d: %08x instead of " -+ "%08x", pnum, magic, UBI_EC_HDR_MAGIC); - return UBI_IO_BAD_EC_HDR; - } - -@@ -687,10 +708,12 @@ - - if (hdr_crc != crc) { - if (verbose) { -- ubi_warn("bad EC header CRC at PEB %d, calculated %#08x," -- " read %#08x", pnum, crc, hdr_crc); -+ ubi_warn("bad EC header CRC at PEB %d, calculated " -+ "%#08x, read %#08x", pnum, crc, hdr_crc); - ubi_dbg_dump_ec_hdr(ec_hdr); -- } -+ } else if (UBI_IO_DEBUG) -+ dbg_msg("bad EC header CRC at PEB %d, calculated " -+ "%#08x, read %#08x", pnum, crc, hdr_crc); - return UBI_IO_BAD_EC_HDR; - } - -@@ -940,6 +963,9 @@ - if (verbose) - ubi_warn("no VID header found at PEB %d, " - "only 0xFF bytes", pnum); -+ else if (UBI_IO_DEBUG) -+ dbg_msg("no VID header found at PEB %d, " -+ "only 0xFF bytes", pnum); - return UBI_IO_PEB_FREE; - } - -@@ -951,7 +977,9 @@ - ubi_warn("bad magic number at PEB %d: %08x instead of " - "%08x", pnum, magic, UBI_VID_HDR_MAGIC); - ubi_dbg_dump_vid_hdr(vid_hdr); -- } -+ } else if (UBI_IO_DEBUG) -+ dbg_msg("bad magic number at PEB %d: %08x instead of " -+ "%08x", pnum, magic, UBI_VID_HDR_MAGIC); - return UBI_IO_BAD_VID_HDR; - } - -@@ -963,7 +991,9 @@ - ubi_warn("bad CRC at PEB %d, calculated %#08x, " - "read %#08x", pnum, crc, hdr_crc); - ubi_dbg_dump_vid_hdr(vid_hdr); -- } -+ } else if (UBI_IO_DEBUG) -+ dbg_msg("bad CRC at PEB %d, calculated %#08x, " -+ "read %#08x", pnum, crc, hdr_crc); - return UBI_IO_BAD_VID_HDR; - } - -@@ -1004,7 +1034,7 @@ - - err = paranoid_check_peb_ec_hdr(ubi, pnum); - if (err) -- return err > 0 ? -EINVAL: err; -+ return err > 0 ? -EINVAL : err; - - vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); - vid_hdr->version = UBI_VERSION; -@@ -1081,8 +1111,7 @@ - } - - /** -- * paranoid_check_peb_ec_hdr - check that the erase counter header of a -- * physical eraseblock is in-place and is all right. -+ * paranoid_check_peb_ec_hdr - check erase counter header. - * @ubi: UBI device description object - * @pnum: the physical eraseblock number to check - * -@@ -1160,8 +1189,7 @@ - } - - /** -- * paranoid_check_peb_vid_hdr - check that the volume identifier header of a -- * physical eraseblock is in-place and is all right. -+ * paranoid_check_peb_vid_hdr - check volume identifier header. - * @ubi: UBI device description object - * @pnum: the physical eraseblock number to check - * -@@ -1242,7 +1270,7 @@ - - fail: - ubi_err("paranoid check failed for PEB %d", pnum); -- dbg_msg("hex dump of the %d-%d region", offset, offset + len); -+ ubi_msg("hex dump of the %d-%d region", offset, offset + len); - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, - ubi->dbg_peb_buf, len, 1); - err = 1; -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/kapi.c linux-2.6.24/drivers/mtd/ubi/kapi.c ---- linux-2.6.24.orig/drivers/mtd/ubi/kapi.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/kapi.c 2009-04-17 09:49:26.000000000 +0200 -@@ -30,23 +30,27 @@ - * @ubi_num: UBI device number - * @di: the information is stored here - * -- * This function returns %0 in case of success and a %-ENODEV if there is no -- * such UBI device. -+ * This function returns %0 in case of success, %-EINVAL if the UBI device -+ * number is invalid, and %-ENODEV if there is no such UBI device. - */ - int ubi_get_device_info(int ubi_num, struct ubi_device_info *di) - { -- const struct ubi_device *ubi; -+ struct ubi_device *ubi; - -- if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || -- !ubi_devices[ubi_num]) -+ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) -+ return -EINVAL; -+ -+ ubi = ubi_get_device(ubi_num); -+ if (!ubi) - return -ENODEV; - -- ubi = ubi_devices[ubi_num]; - di->ubi_num = ubi->ubi_num; - di->leb_size = ubi->leb_size; - di->min_io_size = ubi->min_io_size; - di->ro_mode = ubi->ro_mode; -- di->cdev = MKDEV(ubi->major, 0); -+ di->cdev = ubi->cdev.dev; -+ -+ ubi_put_device(ubi); - return 0; - } - EXPORT_SYMBOL_GPL(ubi_get_device_info); -@@ -73,7 +77,7 @@ - vi->usable_leb_size = vol->usable_leb_size; - vi->name_len = vol->name_len; - vi->name = vol->name; -- vi->cdev = MKDEV(ubi->major, vi->vol_id + 1); -+ vi->cdev = vol->cdev.dev; - } - EXPORT_SYMBOL_GPL(ubi_get_volume_info); - -@@ -102,39 +106,41 @@ - struct ubi_device *ubi; - struct ubi_volume *vol; - -- dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode); -- -- err = -ENODEV; -- if (ubi_num < 0) -- return ERR_PTR(err); -- -- ubi = ubi_devices[ubi_num]; -- -- if (!try_module_get(THIS_MODULE)) -- return ERR_PTR(err); -+ dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode); - -- if (ubi_num >= UBI_MAX_DEVICES || !ubi) -- goto out_put; -+ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) -+ return ERR_PTR(-EINVAL); - -- err = -EINVAL; -- if (vol_id < 0 || vol_id >= ubi->vtbl_slots) -- goto out_put; - if (mode != UBI_READONLY && mode != UBI_READWRITE && - mode != UBI_EXCLUSIVE) -- goto out_put; -+ return ERR_PTR(-EINVAL); -+ -+ /* -+ * First of all, we have to get the UBI device to prevent its removal. -+ */ -+ ubi = ubi_get_device(ubi_num); -+ if (!ubi) -+ return ERR_PTR(-ENODEV); -+ -+ if (vol_id < 0 || vol_id >= ubi->vtbl_slots) { -+ err = -EINVAL; -+ goto out_put_ubi; -+ } - - desc = kmalloc(sizeof(struct ubi_volume_desc), GFP_KERNEL); - if (!desc) { - err = -ENOMEM; -- goto out_put; -+ goto out_put_ubi; - } - -+ err = -ENODEV; -+ if (!try_module_get(THIS_MODULE)) -+ goto out_free; -+ - spin_lock(&ubi->volumes_lock); - vol = ubi->volumes[vol_id]; -- if (!vol) { -- err = -ENODEV; -+ if (!vol) - goto out_unlock; -- } - - err = -EBUSY; - switch (mode) { -@@ -156,21 +162,19 @@ - vol->exclusive = 1; - break; - } -+ get_device(&vol->dev); -+ vol->ref_count += 1; - spin_unlock(&ubi->volumes_lock); - - desc->vol = vol; - desc->mode = mode; - -- /* -- * To prevent simultaneous checks of the same volume we use @vtbl_mutex, -- * although it is not the purpose it was introduced for. -- */ -- mutex_lock(&ubi->vtbl_mutex); -+ mutex_lock(&ubi->ckvol_mutex); - if (!vol->checked) { - /* This is the first open - check the volume */ - err = ubi_check_volume(ubi, vol_id); - if (err < 0) { -- mutex_unlock(&ubi->vtbl_mutex); -+ mutex_unlock(&ubi->ckvol_mutex); - ubi_close_volume(desc); - return ERR_PTR(err); - } -@@ -181,14 +185,17 @@ - } - vol->checked = 1; - } -- mutex_unlock(&ubi->vtbl_mutex); -+ mutex_unlock(&ubi->ckvol_mutex); -+ - return desc; - - out_unlock: - spin_unlock(&ubi->volumes_lock); -- kfree(desc); --out_put: - module_put(THIS_MODULE); -+out_free: -+ kfree(desc); -+out_put_ubi: -+ ubi_put_device(ubi); - return ERR_PTR(err); - } - EXPORT_SYMBOL_GPL(ubi_open_volume); -@@ -205,10 +212,10 @@ - int mode) - { - int i, vol_id = -1, len; -- struct ubi_volume_desc *ret; - struct ubi_device *ubi; -+ struct ubi_volume_desc *ret; - -- dbg_msg("open volume %s, mode %d", name, mode); -+ dbg_gen("open volume %s, mode %d", name, mode); - - if (!name) - return ERR_PTR(-EINVAL); -@@ -217,14 +224,12 @@ - if (len > UBI_VOL_NAME_MAX) - return ERR_PTR(-EINVAL); - -- ret = ERR_PTR(-ENODEV); -- if (!try_module_get(THIS_MODULE)) -- return ret; -- -- if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi_devices[ubi_num]) -- goto out_put; -+ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) -+ return ERR_PTR(-EINVAL); - -- ubi = ubi_devices[ubi_num]; -+ ubi = ubi_get_device(ubi_num); -+ if (!ubi) -+ return ERR_PTR(-ENODEV); - - spin_lock(&ubi->volumes_lock); - /* Walk all volumes of this UBI device */ -@@ -238,13 +243,16 @@ - } - spin_unlock(&ubi->volumes_lock); - -- if (vol_id < 0) -- goto out_put; -- -- ret = ubi_open_volume(ubi_num, vol_id, mode); -+ if (vol_id >= 0) -+ ret = ubi_open_volume(ubi_num, vol_id, mode); -+ else -+ ret = ERR_PTR(-ENODEV); - --out_put: -- module_put(THIS_MODULE); -+ /* -+ * We should put the UBI device even in case of success, because -+ * 'ubi_open_volume()' took a reference as well. -+ */ -+ ubi_put_device(ubi); - return ret; - } - EXPORT_SYMBOL_GPL(ubi_open_volume_nm); -@@ -256,10 +264,11 @@ - void ubi_close_volume(struct ubi_volume_desc *desc) - { - struct ubi_volume *vol = desc->vol; -+ struct ubi_device *ubi = vol->ubi; - -- dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode); -+ dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode); - -- spin_lock(&vol->ubi->volumes_lock); -+ spin_lock(&ubi->volumes_lock); - switch (desc->mode) { - case UBI_READONLY: - vol->readers -= 1; -@@ -270,9 +279,12 @@ - case UBI_EXCLUSIVE: - vol->exclusive = 0; - } -- spin_unlock(&vol->ubi->volumes_lock); -+ vol->ref_count -= 1; -+ spin_unlock(&ubi->volumes_lock); - - kfree(desc); -+ put_device(&vol->dev); -+ ubi_put_device(ubi); - module_put(THIS_MODULE); - } - EXPORT_SYMBOL_GPL(ubi_close_volume); -@@ -311,7 +323,7 @@ - struct ubi_device *ubi = vol->ubi; - int err, vol_id = vol->vol_id; - -- dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); -+ dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); - - if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || - lnum >= vol->used_ebs || offset < 0 || len < 0 || -@@ -332,7 +344,7 @@ - if (len == 0) - return 0; - -- err = ubi_eba_read_leb(ubi, vol_id, lnum, buf, offset, len, check); -+ err = ubi_eba_read_leb(ubi, vol, lnum, buf, offset, len, check); - if (err && err == -EBADMSG && vol->vol_type == UBI_STATIC_VOLUME) { - ubi_warn("mark volume %d as corrupted", vol_id); - vol->corrupted = 1; -@@ -376,7 +388,7 @@ - struct ubi_device *ubi = vol->ubi; - int vol_id = vol->vol_id; - -- dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); -+ dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); - - if (vol_id < 0 || vol_id >= ubi->vtbl_slots) - return -EINVAL; -@@ -385,8 +397,8 @@ - return -EROFS; - - if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 || -- offset + len > vol->usable_leb_size || offset % ubi->min_io_size || -- len % ubi->min_io_size) -+ offset + len > vol->usable_leb_size || -+ offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1)) - return -EINVAL; - - if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && -@@ -399,7 +411,7 @@ - if (len == 0) - return 0; - -- return ubi_eba_write_leb(ubi, vol_id, lnum, buf, offset, len, dtype); -+ return ubi_eba_write_leb(ubi, vol, lnum, buf, offset, len, dtype); - } - EXPORT_SYMBOL_GPL(ubi_leb_write); - -@@ -426,7 +438,7 @@ - struct ubi_device *ubi = vol->ubi; - int vol_id = vol->vol_id; - -- dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); -+ dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); - - if (vol_id < 0 || vol_id >= ubi->vtbl_slots) - return -EINVAL; -@@ -435,7 +447,7 @@ - return -EROFS; - - if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 || -- len > vol->usable_leb_size || len % ubi->min_io_size) -+ len > vol->usable_leb_size || len & (ubi->min_io_size - 1)) - return -EINVAL; - - if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && -@@ -448,7 +460,7 @@ - if (len == 0) - return 0; - -- return ubi_eba_atomic_leb_change(ubi, vol_id, lnum, buf, len, dtype); -+ return ubi_eba_atomic_leb_change(ubi, vol, lnum, buf, len, dtype); - } - EXPORT_SYMBOL_GPL(ubi_leb_change); - -@@ -468,9 +480,9 @@ - { - struct ubi_volume *vol = desc->vol; - struct ubi_device *ubi = vol->ubi; -- int err, vol_id = vol->vol_id; -+ int err; - -- dbg_msg("erase LEB %d:%d", vol_id, lnum); -+ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); - - if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) - return -EROFS; -@@ -481,7 +493,7 @@ - if (vol->upd_marker) - return -EBADF; - -- err = ubi_eba_unmap_leb(ubi, vol_id, lnum); -+ err = ubi_eba_unmap_leb(ubi, vol, lnum); - if (err) - return err; - -@@ -529,9 +541,8 @@ - { - struct ubi_volume *vol = desc->vol; - struct ubi_device *ubi = vol->ubi; -- int vol_id = vol->vol_id; - -- dbg_msg("unmap LEB %d:%d", vol_id, lnum); -+ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); - - if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) - return -EROFS; -@@ -542,11 +553,55 @@ - if (vol->upd_marker) - return -EBADF; - -- return ubi_eba_unmap_leb(ubi, vol_id, lnum); -+ return ubi_eba_unmap_leb(ubi, vol, lnum); - } - EXPORT_SYMBOL_GPL(ubi_leb_unmap); - - /** -+ * ubi_leb_map - map logical erasblock to a physical eraseblock. -+ * @desc: volume descriptor -+ * @lnum: logical eraseblock number -+ * @dtype: expected data type -+ * -+ * This function maps an un-mapped logical eraseblock @lnum to a physical -+ * eraseblock. This means, that after a successfull invocation of this -+ * function the logical eraseblock @lnum will be empty (contain only %0xFF -+ * bytes) and be mapped to a physical eraseblock, even if an unclean reboot -+ * happens. -+ * -+ * This function returns zero in case of success, %-EBADF if the volume is -+ * damaged because of an interrupted update, %-EBADMSG if the logical -+ * eraseblock is already mapped, and other negative error codes in case of -+ * other failures. -+ */ -+int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) -+{ -+ struct ubi_volume *vol = desc->vol; -+ struct ubi_device *ubi = vol->ubi; -+ -+ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); -+ -+ if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) -+ return -EROFS; -+ -+ if (lnum < 0 || lnum >= vol->reserved_pebs) -+ return -EINVAL; -+ -+ if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && -+ dtype != UBI_UNKNOWN) -+ return -EINVAL; -+ -+ if (vol->upd_marker) -+ return -EBADF; -+ -+ if (vol->eba_tbl[lnum] >= 0) -+ return -EBADMSG; -+ -+ return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); -+} -+EXPORT_SYMBOL_GPL(ubi_leb_map); -+ -+/** - * ubi_is_mapped - check if logical eraseblock is mapped. - * @desc: volume descriptor - * @lnum: logical eraseblock number -@@ -566,7 +621,7 @@ - { - struct ubi_volume *vol = desc->vol; - -- dbg_msg("test LEB %d:%d", vol->vol_id, lnum); -+ dbg_gen("test LEB %d:%d", vol->vol_id, lnum); - - if (lnum < 0 || lnum >= vol->reserved_pebs) - return -EINVAL; -@@ -577,3 +632,27 @@ - return vol->eba_tbl[lnum] >= 0; - } - EXPORT_SYMBOL_GPL(ubi_is_mapped); -+ -+/** -+ * ubi_sync - synchronize UBI device buffers. -+ * @ubi_num: UBI device to synchronize -+ * -+ * The underlying MTD device may cache data in hardware or in software. This -+ * function ensures the caches are flushed. Returns zero in case of success and -+ * a negative error code in case of failure. -+ */ -+int ubi_sync(int ubi_num) -+{ -+ struct ubi_device *ubi; -+ -+ ubi = ubi_get_device(ubi_num); -+ if (!ubi) -+ return -ENODEV; -+ -+ if (ubi->mtd->sync) -+ ubi->mtd->sync(ubi->mtd); -+ -+ ubi_put_device(ubi); -+ return 0; -+} -+EXPORT_SYMBOL_GPL(ubi_sync); -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/misc.c linux-2.6.24/drivers/mtd/ubi/misc.c ---- linux-2.6.24.orig/drivers/mtd/ubi/misc.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/misc.c 2009-04-17 09:49:26.000000000 +0200 -@@ -37,7 +37,7 @@ - { - int i; - -- ubi_assert(length % ubi->min_io_size == 0); -+ ubi_assert(!(length & (ubi->min_io_size - 1))); - - for (i = length - 1; i >= 0; i--) - if (((const uint8_t *)buf)[i] != 0xFF) -@@ -79,7 +79,7 @@ - else - size = vol->usable_leb_size; - -- err = ubi_eba_read_leb(ubi, vol_id, i, buf, 0, size, 1); -+ err = ubi_eba_read_leb(ubi, vol, i, buf, 0, size, 1); - if (err) { - if (err == -EBADMSG) - err = 1; -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/scan.c linux-2.6.24/drivers/mtd/ubi/scan.c ---- linux-2.6.24.orig/drivers/mtd/ubi/scan.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/scan.c 2009-04-17 09:49:26.000000000 +0200 -@@ -19,9 +19,9 @@ - */ - - /* -- * UBI scanning unit. -+ * UBI scanning sub-system. - * -- * This unit is responsible for scanning the flash media, checking UBI -+ * This sub-system is responsible for scanning the flash media, checking UBI - * headers and providing complete information about the UBI flash image. - * - * The scanning information is represented by a &struct ubi_scan_info' object. -@@ -92,29 +92,7 @@ - } - - /** -- * commit_to_mean_value - commit intermediate results to the final mean erase -- * counter value. -- * @si: scanning information -- * -- * This is a helper function which calculates partial mean erase counter mean -- * value and adds it to the resulting mean value. As we can work only in -- * integer arithmetic and we want to calculate the mean value of erase counter -- * accurately, we first sum erase counter values in @si->ec_sum variable and -- * count these components in @si->ec_count. If this temporary @si->ec_sum is -- * going to overflow, we calculate the partial mean value -- * (@si->ec_sum/@si->ec_count) and add it to @si->mean_ec. -- */ --static void commit_to_mean_value(struct ubi_scan_info *si) --{ -- si->ec_sum /= si->ec_count; -- if (si->ec_sum % si->ec_count >= si->ec_count / 2) -- si->mean_ec += 1; -- si->mean_ec += si->ec_sum; --} -- --/** -- * validate_vid_hdr - check that volume identifier header is correct and -- * consistent. -+ * validate_vid_hdr - check volume identifier header. - * @vid_hdr: the volume identifier header to check - * @sv: information about the volume this logical eraseblock belongs to - * @pnum: physical eraseblock number the VID header came from -@@ -123,7 +101,7 @@ - * non-zero if an inconsistency was found and zero if not. - * - * Note, UBI does sanity check of everything it reads from the flash media. -- * Most of the checks are done in the I/O unit. Here we check that the -+ * Most of the checks are done in the I/O sub-system. Here we check that the - * information in the VID header is consistent to the information in other VID - * headers of the same volume. - */ -@@ -267,40 +245,21 @@ - struct ubi_vid_hdr *vh = NULL; - unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); - -- if (seb->sqnum == 0 && sqnum2 == 0) { -- long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver); -- -+ if (sqnum2 == seb->sqnum) { - /* -- * UBI constantly increases the logical eraseblock version -- * number and it can overflow. Thus, we have to bear in mind -- * that versions that are close to %0xFFFFFFFF are less then -- * versions that are close to %0. -- * -- * The UBI WL unit guarantees that the number of pending tasks -- * is not greater then %0x7FFFFFFF. So, if the difference -- * between any two versions is greater or equivalent to -- * %0x7FFFFFFF, there was an overflow and the logical -- * eraseblock with lower version is actually newer then the one -- * with higher version. -- * -- * FIXME: but this is anyway obsolete and will be removed at -- * some point. -+ * This must be a really ancient UBI image which has been -+ * created before sequence numbers support has been added. At -+ * that times we used 32-bit LEB versions stored in logical -+ * eraseblocks. That was before UBI got into mainline. We do not -+ * support these images anymore. Well, those images will work -+ * still work, but only if no unclean reboots happened. - */ -+ ubi_err("unsupported on-flash UBI format\n"); -+ return -EINVAL; -+ } - -- dbg_bld("using old crappy leb_ver stuff"); -- -- abs = v1 - v2; -- if (abs < 0) -- abs = -abs; -- -- if (abs < 0x7FFFFFFF) -- /* Non-overflow situation */ -- second_is_newer = (v2 > v1); -- else -- second_is_newer = (v2 < v1); -- } else -- /* Obviously the LEB with lower sequence counter is older */ -- second_is_newer = sqnum2 > seb->sqnum; -+ /* Obviously the LEB with lower sequence counter is older */ -+ second_is_newer = !!(sqnum2 > seb->sqnum); - - /* - * Now we know which copy is newer. If the copy flag of the PEB with -@@ -308,7 +267,7 @@ - * check data CRC. For the second PEB we already have the VID header, - * for the first one - we'll need to re-read it from flash. - * -- * FIXME: this may be optimized so that we wouldn't read twice. -+ * Note: this may be optimized so that we wouldn't read twice. - */ - - if (second_is_newer) { -@@ -360,7 +319,7 @@ - } - - err = ubi_io_read_data(ubi, buf, pnum, 0, len); -- if (err && err != UBI_IO_BITFLIPS) -+ if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) - goto out_free_buf; - - data_crc = be32_to_cpu(vid_hdr->data_crc); -@@ -390,13 +349,11 @@ - vfree(buf); - out_free_vidh: - ubi_free_vid_hdr(ubi, vh); -- ubi_assert(err < 0); - return err; - } - - /** -- * ubi_scan_add_used - add information about a physical eraseblock to the -- * scanning information. -+ * ubi_scan_add_used - add physical eraseblock to the scanning information. - * @ubi: UBI device description object - * @si: scanning information - * @pnum: the physical eraseblock number -@@ -416,7 +373,6 @@ - int bitflips) - { - int err, vol_id, lnum; -- uint32_t leb_ver; - unsigned long long sqnum; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *seb; -@@ -425,13 +381,12 @@ - vol_id = be32_to_cpu(vid_hdr->vol_id); - lnum = be32_to_cpu(vid_hdr->lnum); - sqnum = be64_to_cpu(vid_hdr->sqnum); -- leb_ver = be32_to_cpu(vid_hdr->leb_ver); - -- dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", -- pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); -+ dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d", -+ pnum, vol_id, lnum, ec, sqnum, bitflips); - - sv = add_volume(si, vol_id, pnum, vid_hdr); -- if (IS_ERR(sv) < 0) -+ if (IS_ERR(sv)) - return PTR_ERR(sv); - - if (si->max_sqnum < sqnum) -@@ -461,25 +416,20 @@ - */ - - dbg_bld("this LEB already exists: PEB %d, sqnum %llu, " -- "LEB ver %u, EC %d", seb->pnum, seb->sqnum, -- seb->leb_ver, seb->ec); -- -- /* -- * Make sure that the logical eraseblocks have different -- * versions. Otherwise the image is bad. -- */ -- if (seb->leb_ver == leb_ver && leb_ver != 0) { -- ubi_err("two LEBs with same version %u", leb_ver); -- ubi_dbg_dump_seb(seb, 0); -- ubi_dbg_dump_vid_hdr(vid_hdr); -- return -EINVAL; -- } -+ "EC %d", seb->pnum, seb->sqnum, seb->ec); - - /* - * Make sure that the logical eraseblocks have different - * sequence numbers. Otherwise the image is bad. - * -- * FIXME: remove 'sqnum != 0' check when leb_ver is removed. -+ * However, if the sequence number is zero, we assume it must -+ * be an ancient UBI image from the era when UBI did not have -+ * sequence numbers. We still can attach these images, unless -+ * there is a need to distinguish between old and new -+ * eraseblocks, in which case we'll refuse the image in -+ * 'compare_lebs()'. In other words, we attach old clean -+ * images, but refuse attaching old images with duplicated -+ * logical eraseblocks because there was an unclean reboot. - */ - if (seb->sqnum == sqnum && sqnum != 0) { - ubi_err("two LEBs with same sequence number %llu", -@@ -519,7 +469,6 @@ - seb->pnum = pnum; - seb->scrub = ((cmp_res & 2) || bitflips); - seb->sqnum = sqnum; -- seb->leb_ver = leb_ver; - - if (sv->highest_lnum == lnum) - sv->last_data_size = -@@ -556,7 +505,6 @@ - seb->lnum = lnum; - seb->sqnum = sqnum; - seb->scrub = bitflips; -- seb->leb_ver = leb_ver; - - if (sv->highest_lnum <= lnum) { - sv->highest_lnum = lnum; -@@ -570,8 +518,7 @@ - } - - /** -- * ubi_scan_find_sv - find information about a particular volume in the -- * scanning information. -+ * ubi_scan_find_sv - find volume in the scanning information. - * @si: scanning information - * @vol_id: the requested volume ID - * -@@ -600,8 +547,7 @@ - } - - /** -- * ubi_scan_find_seb - find information about a particular logical -- * eraseblock in the volume scanning information. -+ * ubi_scan_find_seb - find LEB in the volume scanning information. - * @sv: a pointer to the volume scanning information - * @lnum: the requested logical eraseblock - * -@@ -661,9 +607,9 @@ - * - * This function erases physical eraseblock 'pnum', and writes the erase - * counter header to it. This function should only be used on UBI device -- * initialization stages, when the EBA unit had not been yet initialized. This -- * function returns zero in case of success and a negative error code in case -- * of failure. -+ * initialization stages, when the EBA sub-system had not been yet initialized. -+ * This function returns zero in case of success and a negative error code in -+ * case of failure. - */ - int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, - int pnum, int ec) -@@ -703,9 +649,10 @@ - * @si: scanning information - * - * This function returns a free physical eraseblock. It is supposed to be -- * called on the UBI initialization stages when the wear-leveling unit is not -- * initialized yet. This function picks a physical eraseblocks from one of the -- * lists, writes the EC header if it is needed, and removes it from the list. -+ * called on the UBI initialization stages when the wear-leveling sub-system is -+ * not initialized yet. This function picks a physical eraseblocks from one of -+ * the lists, writes the EC header if it is needed, and removes it from the -+ * list. - * - * This function returns scanning physical eraseblock information in case of - * success and an error code in case of failure. -@@ -758,8 +705,7 @@ - } - - /** -- * process_eb - read UBI headers, check them and add corresponding data -- * to the scanning information. -+ * process_eb - read, check UBI headers, and add them to scanning information. - * @ubi: UBI device description object - * @si: scanning information - * @pnum: the physical eraseblock number -@@ -767,9 +713,10 @@ - * This function returns a zero if the physical eraseblock was successfully - * handled and a negative error code in case of failure. - */ --static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) -+static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, -+ int pnum) - { -- long long ec; -+ long long uninitialized_var(ec); - int err, bitflips = 0, vol_id, ec_corr = 0; - - dbg_bld("scan PEB %d", pnum); -@@ -780,8 +727,9 @@ - return err; - else if (err) { - /* -- * FIXME: this is actually duty of the I/O unit to initialize -- * this, but MTD does not provide enough information. -+ * FIXME: this is actually duty of the I/O sub-system to -+ * initialize this, but MTD does not provide enough -+ * information. - */ - si->bad_peb_count += 1; - return 0; -@@ -854,7 +802,7 @@ - } - - vol_id = be32_to_cpu(vidh->vol_id); -- if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) { -+ if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { - int lnum = be32_to_cpu(vidh->lnum); - - /* Unsupported internal volume */ -@@ -897,15 +845,8 @@ - - adjust_mean_ec: - if (!ec_corr) { -- if (si->ec_sum + ec < ec) { -- commit_to_mean_value(si); -- si->ec_sum = 0; -- si->ec_count = 0; -- } else { -- si->ec_sum += ec; -- si->ec_count += 1; -- } -- -+ si->ec_sum += ec; -+ si->ec_count += 1; - if (ec > si->max_ec) - si->max_ec = ec; - if (ec < si->min_ec) -@@ -953,7 +894,7 @@ - for (pnum = 0; pnum < ubi->peb_count; pnum++) { - cond_resched(); - -- dbg_msg("process PEB %d", pnum); -+ dbg_gen("process PEB %d", pnum); - err = process_eb(ubi, si, pnum); - if (err < 0) - goto out_vidh; -@@ -961,9 +902,9 @@ - - dbg_msg("scanning is finished"); - -- /* Finish mean erase counter calculations */ -+ /* Calculate mean erase counter */ - if (si->ec_count) -- commit_to_mean_value(si); -+ si->mean_ec = div_u64(si->ec_sum, si->ec_count); - - if (si->is_empty) - ubi_msg("empty MTD device detected"); -@@ -1100,8 +1041,7 @@ - #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID - - /** -- * paranoid_check_si - check if the scanning information is correct and -- * consistent. -+ * paranoid_check_si - check the scanning information. - * @ubi: UBI device description object - * @si: scanning information - * -@@ -1286,11 +1226,6 @@ - ubi_err("bad data_pad %d", sv->data_pad); - goto bad_vid_hdr; - } -- -- if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) { -- ubi_err("bad leb_ver %u", seb->leb_ver); -- goto bad_vid_hdr; -- } - } - - if (!last_seb) -@@ -1320,8 +1255,7 @@ - if (err < 0) { - kfree(buf); - return err; -- } -- else if (err) -+ } else if (err) - buf[pnum] = 1; - } - -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/scan.h linux-2.6.24/drivers/mtd/ubi/scan.h ---- linux-2.6.24.orig/drivers/mtd/ubi/scan.h 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/scan.h 2009-04-17 09:49:26.000000000 +0200 -@@ -34,7 +34,6 @@ - * @u: unions RB-tree or @list links - * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects - * @u.list: link in one of the eraseblock lists -- * @leb_ver: logical eraseblock version (obsolete) - * - * One object of this type is allocated for each physical eraseblock during - * scanning. -@@ -49,7 +48,6 @@ - struct rb_node rb; - struct list_head list; - } u; -- uint32_t leb_ver; - }; - - /** -@@ -59,16 +57,16 @@ - * @leb_count: number of logical eraseblocks in this volume - * @vol_type: volume type - * @used_ebs: number of used logical eraseblocks in this volume (only for -- * static volumes) -+ * static volumes) - * @last_data_size: amount of data in the last logical eraseblock of this -- * volume (always equivalent to the usable logical eraseblock size in case of -- * dynamic volumes) -+ * volume (always equivalent to the usable logical eraseblock -+ * size in case of dynamic volumes) - * @data_pad: how many bytes at the end of logical eraseblocks of this volume -- * are not used (due to volume alignment) -+ * are not used (due to volume alignment) - * @compat: compatibility flags of this volume - * @rb: link in the volume RB-tree - * @root: root of the RB-tree containing all the eraseblock belonging to this -- * volume (&struct ubi_scan_leb objects) -+ * volume (&struct ubi_scan_leb objects) - * - * One object of this type is allocated for each volume during scanning. - */ -@@ -92,8 +90,8 @@ - * @free: list of free physical eraseblocks - * @erase: list of physical eraseblocks which have to be erased - * @alien: list of physical eraseblocks which should not be used by UBI (e.g., -+ * those belonging to "preserve"-compatible internal volumes) - * @bad_peb_count: count of bad physical eraseblocks -- * those belonging to "preserve"-compatible internal volumes) - * @vols_found: number of volumes found during scanning - * @highest_vol_id: highest volume ID - * @alien_peb_count: count of physical eraseblocks in the @alien list -@@ -106,8 +104,8 @@ - * @ec_count: a temporary variable used when calculating @mean_ec - * - * This data structure contains the result of scanning and may be used by other -- * UBI units to build final UBI data structures, further error-recovery and so -- * on. -+ * UBI sub-systems to build final UBI data structures, further error-recovery -+ * and so on. - */ - struct ubi_scan_info { - struct rb_root volumes; -@@ -124,7 +122,7 @@ - int max_ec; - unsigned long long max_sqnum; - int mean_ec; -- int ec_sum; -+ uint64_t ec_sum; - int ec_count; - }; - -@@ -132,8 +130,7 @@ - struct ubi_vid_hdr; - - /* -- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a -- * list. -+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list. - * - * @sv: volume scanning information - * @seb: scanning eraseblock infprmation -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/ubi-media.h linux-2.6.24/drivers/mtd/ubi/ubi-media.h ---- linux-2.6.24.orig/drivers/mtd/ubi/ubi-media.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/drivers/mtd/ubi/ubi-media.h 2009-04-17 09:49:26.000000000 +0200 -@@ -0,0 +1,368 @@ -+/* -+ * Copyright (c) International Business Machines Corp., 2006 -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See -+ * the GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Thomas Gleixner -+ * Frank Haverkamp -+ * Oliver Lohmann -+ * Andreas Arnez -+ */ -+ -+/* -+ * This file defines the layout of UBI headers and all the other UBI on-flash -+ * data structures. -+ */ -+ -+#ifndef __UBI_MEDIA_H__ -+#define __UBI_MEDIA_H__ -+ -+#include <asm/byteorder.h> -+ -+/* The version of UBI images supported by this implementation */ -+#define UBI_VERSION 1 -+ -+/* The highest erase counter value supported by this implementation */ -+#define UBI_MAX_ERASECOUNTER 0x7FFFFFFF -+ -+/* The initial CRC32 value used when calculating CRC checksums */ -+#define UBI_CRC32_INIT 0xFFFFFFFFU -+ -+/* Erase counter header magic number (ASCII "UBI#") */ -+#define UBI_EC_HDR_MAGIC 0x55424923 -+/* Volume identifier header magic number (ASCII "UBI!") */ -+#define UBI_VID_HDR_MAGIC 0x55424921 -+ -+/* -+ * Volume type constants used in the volume identifier header. -+ * -+ * @UBI_VID_DYNAMIC: dynamic volume -+ * @UBI_VID_STATIC: static volume -+ */ -+enum { -+ UBI_VID_DYNAMIC = 1, -+ UBI_VID_STATIC = 2 -+}; -+ -+/* -+ * Volume flags used in the volume table record. -+ * -+ * @UBI_VTBL_AUTORESIZE_FLG: auto-resize this volume -+ * -+ * %UBI_VTBL_AUTORESIZE_FLG flag can be set only for one volume in the volume -+ * table. UBI automatically re-sizes the volume which has this flag and makes -+ * the volume to be of largest possible size. This means that if after the -+ * initialization UBI finds out that there are available physical eraseblocks -+ * present on the device, it automatically appends all of them to the volume -+ * (the physical eraseblocks reserved for bad eraseblocks handling and other -+ * reserved physical eraseblocks are not taken). So, if there is a volume with -+ * the %UBI_VTBL_AUTORESIZE_FLG flag set, the amount of available logical -+ * eraseblocks will be zero after UBI is loaded, because all of them will be -+ * reserved for this volume. Note, the %UBI_VTBL_AUTORESIZE_FLG bit is cleared -+ * after the volume had been initialized. -+ * -+ * The auto-resize feature is useful for device production purposes. For -+ * example, different NAND flash chips may have different amount of initial bad -+ * eraseblocks, depending of particular chip instance. Manufacturers of NAND -+ * chips usually guarantee that the amount of initial bad eraseblocks does not -+ * exceed certain percent, e.g. 2%. When one creates an UBI image which will be -+ * flashed to the end devices in production, he does not know the exact amount -+ * of good physical eraseblocks the NAND chip on the device will have, but this -+ * number is required to calculate the volume sized and put them to the volume -+ * table of the UBI image. In this case, one of the volumes (e.g., the one -+ * which will store the root file system) is marked as "auto-resizable", and -+ * UBI will adjust its size on the first boot if needed. -+ * -+ * Note, first UBI reserves some amount of physical eraseblocks for bad -+ * eraseblock handling, and then re-sizes the volume, not vice-versa. This -+ * means that the pool of reserved physical eraseblocks will always be present. -+ */ -+enum { -+ UBI_VTBL_AUTORESIZE_FLG = 0x01, -+}; -+ -+/* -+ * Compatibility constants used by internal volumes. -+ * -+ * @UBI_COMPAT_DELETE: delete this internal volume before anything is written -+ * to the flash -+ * @UBI_COMPAT_RO: attach this device in read-only mode -+ * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its -+ * physical eraseblocks, don't allow the wear-leveling -+ * sub-system to move them -+ * @UBI_COMPAT_REJECT: reject this UBI image -+ */ -+enum { -+ UBI_COMPAT_DELETE = 1, -+ UBI_COMPAT_RO = 2, -+ UBI_COMPAT_PRESERVE = 4, -+ UBI_COMPAT_REJECT = 5 -+}; -+ -+/* Sizes of UBI headers */ -+#define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr) -+#define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr) -+ -+/* Sizes of UBI headers without the ending CRC */ -+#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(__be32)) -+#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(__be32)) -+ -+/** -+ * struct ubi_ec_hdr - UBI erase counter header. -+ * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) -+ * @version: version of UBI implementation which is supposed to accept this -+ * UBI image -+ * @padding1: reserved for future, zeroes -+ * @ec: the erase counter -+ * @vid_hdr_offset: where the VID header starts -+ * @data_offset: where the user data start -+ * @padding2: reserved for future, zeroes -+ * @hdr_crc: erase counter header CRC checksum -+ * -+ * The erase counter header takes 64 bytes and has a plenty of unused space for -+ * future usage. The unused fields are zeroed. The @version field is used to -+ * indicate the version of UBI implementation which is supposed to be able to -+ * work with this UBI image. If @version is greater then the current UBI -+ * version, the image is rejected. This may be useful in future if something -+ * is changed radically. This field is duplicated in the volume identifier -+ * header. -+ * -+ * The @vid_hdr_offset and @data_offset fields contain the offset of the the -+ * volume identifier header and user data, relative to the beginning of the -+ * physical eraseblock. These values have to be the same for all physical -+ * eraseblocks. -+ */ -+struct ubi_ec_hdr { -+ __be32 magic; -+ __u8 version; -+ __u8 padding1[3]; -+ __be64 ec; /* Warning: the current limit is 31-bit anyway! */ -+ __be32 vid_hdr_offset; -+ __be32 data_offset; -+ __u8 padding2[36]; -+ __be32 hdr_crc; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubi_vid_hdr - on-flash UBI volume identifier header. -+ * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) -+ * @version: UBI implementation version which is supposed to accept this UBI -+ * image (%UBI_VERSION) -+ * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) -+ * @copy_flag: if this logical eraseblock was copied from another physical -+ * eraseblock (for wear-leveling reasons) -+ * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, -+ * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) -+ * @vol_id: ID of this volume -+ * @lnum: logical eraseblock number -+ * @padding1: reserved for future, zeroes -+ * @data_size: how many bytes of data this logical eraseblock contains -+ * @used_ebs: total number of used logical eraseblocks in this volume -+ * @data_pad: how many bytes at the end of this physical eraseblock are not -+ * used -+ * @data_crc: CRC checksum of the data stored in this logical eraseblock -+ * @padding2: reserved for future, zeroes -+ * @sqnum: sequence number -+ * @padding3: reserved for future, zeroes -+ * @hdr_crc: volume identifier header CRC checksum -+ * -+ * The @sqnum is the value of the global sequence counter at the time when this -+ * VID header was created. The global sequence counter is incremented each time -+ * UBI writes a new VID header to the flash, i.e. when it maps a logical -+ * eraseblock to a new physical eraseblock. The global sequence counter is an -+ * unsigned 64-bit integer and we assume it never overflows. The @sqnum -+ * (sequence number) is used to distinguish between older and newer versions of -+ * logical eraseblocks. -+ * -+ * There are 2 situations when there may be more then one physical eraseblock -+ * corresponding to the same logical eraseblock, i.e., having the same @vol_id -+ * and @lnum values in the volume identifier header. Suppose we have a logical -+ * eraseblock L and it is mapped to the physical eraseblock P. -+ * -+ * 1. Because UBI may erase physical eraseblocks asynchronously, the following -+ * situation is possible: L is asynchronously erased, so P is scheduled for -+ * erasure, then L is written to,i.e. mapped to another physical eraseblock P1, -+ * so P1 is written to, then an unclean reboot happens. Result - there are 2 -+ * physical eraseblocks P and P1 corresponding to the same logical eraseblock -+ * L. But P1 has greater sequence number, so UBI picks P1 when it attaches the -+ * flash. -+ * -+ * 2. From time to time UBI moves logical eraseblocks to other physical -+ * eraseblocks for wear-leveling reasons. If, for example, UBI moves L from P -+ * to P1, and an unclean reboot happens before P is physically erased, there -+ * are two physical eraseblocks P and P1 corresponding to L and UBI has to -+ * select one of them when the flash is attached. The @sqnum field says which -+ * PEB is the original (obviously P will have lower @sqnum) and the copy. But -+ * it is not enough to select the physical eraseblock with the higher sequence -+ * number, because the unclean reboot could have happen in the middle of the -+ * copying process, so the data in P is corrupted. It is also not enough to -+ * just select the physical eraseblock with lower sequence number, because the -+ * data there may be old (consider a case if more data was added to P1 after -+ * the copying). Moreover, the unclean reboot may happen when the erasure of P -+ * was just started, so it result in unstable P, which is "mostly" OK, but -+ * still has unstable bits. -+ * -+ * UBI uses the @copy_flag field to indicate that this logical eraseblock is a -+ * copy. UBI also calculates data CRC when the data is moved and stores it at -+ * the @data_crc field of the copy (P1). So when UBI needs to pick one physical -+ * eraseblock of two (P or P1), the @copy_flag of the newer one (P1) is -+ * examined. If it is cleared, the situation* is simple and the newer one is -+ * picked. If it is set, the data CRC of the copy (P1) is examined. If the CRC -+ * checksum is correct, this physical eraseblock is selected (P1). Otherwise -+ * the older one (P) is selected. -+ * -+ * There are 2 sorts of volumes in UBI: user volumes and internal volumes. -+ * Internal volumes are not seen from outside and are used for various internal -+ * UBI purposes. In this implementation there is only one internal volume - the -+ * layout volume. Internal volumes are the main mechanism of UBI extensions. -+ * For example, in future one may introduce a journal internal volume. Internal -+ * volumes have their own reserved range of IDs. -+ * -+ * The @compat field is only used for internal volumes and contains the "degree -+ * of their compatibility". It is always zero for user volumes. This field -+ * provides a mechanism to introduce UBI extensions and to be still compatible -+ * with older UBI binaries. For example, if someone introduced a journal in -+ * future, he would probably use %UBI_COMPAT_DELETE compatibility for the -+ * journal volume. And in this case, older UBI binaries, which know nothing -+ * about the journal volume, would just delete this volume and work perfectly -+ * fine. This is similar to what Ext2fs does when it is fed by an Ext3fs image -+ * - it just ignores the Ext3fs journal. -+ * -+ * The @data_crc field contains the CRC checksum of the contents of the logical -+ * eraseblock if this is a static volume. In case of dynamic volumes, it does -+ * not contain the CRC checksum as a rule. The only exception is when the -+ * data of the physical eraseblock was moved by the wear-leveling sub-system, -+ * then the wear-leveling sub-system calculates the data CRC and stores it in -+ * the @data_crc field. And of course, the @copy_flag is %in this case. -+ * -+ * The @data_size field is used only for static volumes because UBI has to know -+ * how many bytes of data are stored in this eraseblock. For dynamic volumes, -+ * this field usually contains zero. The only exception is when the data of the -+ * physical eraseblock was moved to another physical eraseblock for -+ * wear-leveling reasons. In this case, UBI calculates CRC checksum of the -+ * contents and uses both @data_crc and @data_size fields. In this case, the -+ * @data_size field contains data size. -+ * -+ * The @used_ebs field is used only for static volumes and indicates how many -+ * eraseblocks the data of the volume takes. For dynamic volumes this field is -+ * not used and always contains zero. -+ * -+ * The @data_pad is calculated when volumes are created using the alignment -+ * parameter. So, effectively, the @data_pad field reduces the size of logical -+ * eraseblocks of this volume. This is very handy when one uses block-oriented -+ * software (say, cramfs) on top of the UBI volume. -+ */ -+struct ubi_vid_hdr { -+ __be32 magic; -+ __u8 version; -+ __u8 vol_type; -+ __u8 copy_flag; -+ __u8 compat; -+ __be32 vol_id; -+ __be32 lnum; -+ __u8 padding1[4]; -+ __be32 data_size; -+ __be32 used_ebs; -+ __be32 data_pad; -+ __be32 data_crc; -+ __u8 padding2[4]; -+ __be64 sqnum; -+ __u8 padding3[12]; -+ __be32 hdr_crc; -+} __attribute__ ((packed)); -+ -+/* Internal UBI volumes count */ -+#define UBI_INT_VOL_COUNT 1 -+ -+/* -+ * Starting ID of internal volumes. There is reserved room for 4096 internal -+ * volumes. -+ */ -+#define UBI_INTERNAL_VOL_START (0x7FFFFFFF - 4096) -+ -+/* The layout volume contains the volume table */ -+ -+#define UBI_LAYOUT_VOLUME_ID UBI_INTERNAL_VOL_START -+#define UBI_LAYOUT_VOLUME_TYPE UBI_VID_DYNAMIC -+#define UBI_LAYOUT_VOLUME_ALIGN 1 -+#define UBI_LAYOUT_VOLUME_EBS 2 -+#define UBI_LAYOUT_VOLUME_NAME "layout volume" -+#define UBI_LAYOUT_VOLUME_COMPAT UBI_COMPAT_REJECT -+ -+/* The maximum number of volumes per one UBI device */ -+#define UBI_MAX_VOLUMES 128 -+ -+/* The maximum volume name length */ -+#define UBI_VOL_NAME_MAX 127 -+ -+/* Size of the volume table record */ -+#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record) -+ -+/* Size of the volume table record without the ending CRC */ -+#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(__be32)) -+ -+/** -+ * struct ubi_vtbl_record - a record in the volume table. -+ * @reserved_pebs: how many physical eraseblocks are reserved for this volume -+ * @alignment: volume alignment -+ * @data_pad: how many bytes are unused at the end of the each physical -+ * eraseblock to satisfy the requested alignment -+ * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) -+ * @upd_marker: if volume update was started but not finished -+ * @name_len: volume name length -+ * @name: the volume name -+ * @flags: volume flags (%UBI_VTBL_AUTORESIZE_FLG) -+ * @padding: reserved, zeroes -+ * @crc: a CRC32 checksum of the record -+ * -+ * The volume table records are stored in the volume table, which is stored in -+ * the layout volume. The layout volume consists of 2 logical eraseblock, each -+ * of which contains a copy of the volume table (i.e., the volume table is -+ * duplicated). The volume table is an array of &struct ubi_vtbl_record -+ * objects indexed by the volume ID. -+ * -+ * If the size of the logical eraseblock is large enough to fit -+ * %UBI_MAX_VOLUMES records, the volume table contains %UBI_MAX_VOLUMES -+ * records. Otherwise, it contains as many records as it can fit (i.e., size of -+ * logical eraseblock divided by sizeof(struct ubi_vtbl_record)). -+ * -+ * The @upd_marker flag is used to implement volume update. It is set to %1 -+ * before update and set to %0 after the update. So if the update operation was -+ * interrupted, UBI knows that the volume is corrupted. -+ * -+ * The @alignment field is specified when the volume is created and cannot be -+ * later changed. It may be useful, for example, when a block-oriented file -+ * system works on top of UBI. The @data_pad field is calculated using the -+ * logical eraseblock size and @alignment. The alignment must be multiple to the -+ * minimal flash I/O unit. If @alignment is 1, all the available space of -+ * the physical eraseblocks is used. -+ * -+ * Empty records contain all zeroes and the CRC checksum of those zeroes. -+ */ -+struct ubi_vtbl_record { -+ __be32 reserved_pebs; -+ __be32 alignment; -+ __be32 data_pad; -+ __u8 vol_type; -+ __u8 upd_marker; -+ __be16 name_len; -+ __u8 name[UBI_VOL_NAME_MAX+1]; -+ __u8 flags; -+ __u8 padding[23]; -+ __be32 crc; -+} __attribute__ ((packed)); -+ -+#endif /* !__UBI_MEDIA_H__ */ -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/ubi.h linux-2.6.24/drivers/mtd/ubi/ubi.h ---- linux-2.6.24.orig/drivers/mtd/ubi/ubi.h 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/ubi.h 2009-04-17 09:49:26.000000000 +0200 -@@ -37,10 +37,9 @@ - #include <linux/string.h> - #include <linux/vmalloc.h> - #include <linux/mtd/mtd.h> -- --#include <mtd/ubi-header.h> - #include <linux/mtd/ubi.h> - -+#include "ubi-media.h" - #include "scan.h" - #include "debug.h" - -@@ -75,15 +74,22 @@ - #define UBI_IO_RETRIES 3 - - /* -- * Error codes returned by the I/O unit. -+ * Length of the protection queue. The length is effectively equivalent to the -+ * number of (global) erase cycles PEBs are protected from the wear-leveling -+ * worker. -+ */ -+#define UBI_PROT_QUEUE_LEN 10 -+ -+/* -+ * Error codes returned by the I/O sub-system. - * - * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only -- * 0xFF bytes -+ * %0xFF bytes - * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a -- * valid erase counter header, and the rest are %0xFF bytes -+ * valid erase counter header, and the rest are %0xFF bytes - * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) - * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or -- * CRC) -+ * CRC) - * UBI_IO_BITFLIPS: bit-flips were detected and corrected - */ - enum { -@@ -94,8 +100,68 @@ - UBI_IO_BITFLIPS - }; - --extern int ubi_devices_cnt; --extern struct ubi_device *ubi_devices[]; -+/** -+ * struct ubi_wl_entry - wear-leveling entry. -+ * @u.rb: link in the corresponding (free/used) RB-tree -+ * @u.list: link in the protection queue -+ * @ec: erase counter -+ * @pnum: physical eraseblock number -+ * -+ * This data structure is used in the WL sub-system. Each physical eraseblock -+ * has a corresponding &struct wl_entry object which may be kept in different -+ * RB-trees. See WL sub-system for details. -+ */ -+struct ubi_wl_entry { -+ union { -+ struct rb_node rb; -+ struct list_head list; -+ } u; -+ int ec; -+ int pnum; -+}; -+ -+/** -+ * struct ubi_ltree_entry - an entry in the lock tree. -+ * @rb: links RB-tree nodes -+ * @vol_id: volume ID of the locked logical eraseblock -+ * @lnum: locked logical eraseblock number -+ * @users: how many tasks are using this logical eraseblock or wait for it -+ * @mutex: read/write mutex to implement read/write access serialization to -+ * the (@vol_id, @lnum) logical eraseblock -+ * -+ * This data structure is used in the EBA sub-system to implement per-LEB -+ * locking. When a logical eraseblock is being locked - corresponding -+ * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree). -+ * See EBA sub-system for details. -+ */ -+struct ubi_ltree_entry { -+ struct rb_node rb; -+ int vol_id; -+ int lnum; -+ int users; -+ struct rw_semaphore mutex; -+}; -+ -+/** -+ * struct ubi_rename_entry - volume re-name description data structure. -+ * @new_name_len: new volume name length -+ * @new_name: new volume name -+ * @remove: if not zero, this volume should be removed, not re-named -+ * @desc: descriptor of the volume -+ * @list: links re-name entries into a list -+ * -+ * This data structure is utilized in the multiple volume re-name code. Namely, -+ * UBI first creates a list of &struct ubi_rename_entry objects from the -+ * &struct ubi_rnvol_req request object, and then utilizes this list to do all -+ * the job. -+ */ -+struct ubi_rename_entry { -+ int new_name_len; -+ char new_name[UBI_VOL_NAME_MAX + 1]; -+ int remove; -+ struct ubi_volume_desc *desc; -+ struct list_head list; -+}; - - struct ubi_volume_desc; - -@@ -105,11 +171,10 @@ - * @cdev: character device object to create character device - * @ubi: reference to the UBI device description object - * @vol_id: volume ID -+ * @ref_count: volume reference count - * @readers: number of users holding this volume in read-only mode - * @writers: number of users holding this volume in read-write mode - * @exclusive: whether somebody holds this volume in exclusive mode -- * @removed: if the volume was removed -- * @checked: if this static volume was checked - * - * @reserved_pebs: how many physical eraseblocks are reserved for this volume - * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) -@@ -117,21 +182,30 @@ - * @used_ebs: how many logical eraseblocks in this volume contain data - * @last_eb_bytes: how many bytes are stored in the last logical eraseblock - * @used_bytes: how many bytes of data this volume contains -- * @upd_marker: non-zero if the update marker is set for this volume -- * @corrupted: non-zero if the volume is corrupted (static volumes only) - * @alignment: volume alignment - * @data_pad: how many bytes are not used at the end of physical eraseblocks to -- * satisfy the requested alignment -+ * satisfy the requested alignment - * @name_len: volume name length - * @name: volume name - * -- * @updating: whether the volume is being updated - * @upd_ebs: how many eraseblocks are expected to be updated -- * @upd_bytes: how many bytes are expected to be received -- * @upd_received: how many update bytes were already received -- * @upd_buf: update buffer which is used to collect update data -+ * @ch_lnum: LEB number which is being changing by the atomic LEB change -+ * operation -+ * @ch_dtype: data persistency type which is being changing by the atomic LEB -+ * change operation -+ * @upd_bytes: how many bytes are expected to be received for volume update or -+ * atomic LEB change -+ * @upd_received: how many bytes were already received for volume update or -+ * atomic LEB change -+ * @upd_buf: update buffer which is used to collect update data or data for -+ * atomic LEB change - * - * @eba_tbl: EBA table of this volume (LEB->PEB mapping) -+ * @checked: %1 if this static volume was checked -+ * @corrupted: %1 if the volume is corrupted (static volumes only) -+ * @upd_marker: %1 if the update marker is set for this volume -+ * @updating: %1 if the volume is being updated -+ * @changing_leb: %1 if the atomic LEB change ioctl command is in progress - * - * @gluebi_desc: gluebi UBI volume descriptor - * @gluebi_refcount: reference count of the gluebi MTD device -@@ -150,11 +224,10 @@ - struct cdev cdev; - struct ubi_device *ubi; - int vol_id; -+ int ref_count; - int readers; - int writers; - int exclusive; -- int removed; -- int checked; - - int reserved_pebs; - int vol_type; -@@ -162,23 +235,31 @@ - int used_ebs; - int last_eb_bytes; - long long used_bytes; -- int upd_marker; -- int corrupted; - int alignment; - int data_pad; - int name_len; -- char name[UBI_VOL_NAME_MAX+1]; -+ char name[UBI_VOL_NAME_MAX + 1]; - -- int updating; - int upd_ebs; -+ int ch_lnum; -+ int ch_dtype; - long long upd_bytes; - long long upd_received; - void *upd_buf; - - int *eba_tbl; -+ unsigned int checked:1; -+ unsigned int corrupted:1; -+ unsigned int upd_marker:1; -+ unsigned int updating:1; -+ unsigned int changing_leb:1; - - #ifdef CONFIG_MTD_UBI_GLUEBI -- /* Gluebi-related stuff may be compiled out */ -+ /* -+ * Gluebi-related stuff may be compiled out. -+ * Note: this should not be built into UBI but should be a separate -+ * ubimtd driver which works on top of UBI and emulates MTD devices. -+ */ - struct ubi_volume_desc *gluebi_desc; - int gluebi_refcount; - struct mtd_info gluebi_mtd; -@@ -186,8 +267,7 @@ - }; - - /** -- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is -- * opened. -+ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened. - * @vol: reference to the corresponding volume description object - * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE) - */ -@@ -200,28 +280,31 @@ - - /** - * struct ubi_device - UBI device description structure -- * @dev: class device object to use the the Linux device model -+ * @dev: UBI device object to use the the Linux device model - * @cdev: character device object to create character device - * @ubi_num: UBI device number - * @ubi_name: UBI device name -- * @major: character device major number - * @vol_count: number of volumes in this UBI device - * @volumes: volumes of this UBI device - * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs, -- * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, @vol->readers, -- * @vol->writers, @vol->exclusive, @vol->removed, @vol->mapping and -- * @vol->eba_tbl. -+ * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, -+ * @vol->readers, @vol->writers, @vol->exclusive, -+ * @vol->ref_count, @vol->mapping and @vol->eba_tbl. -+ * @ref_count: count of references on the UBI device - * - * @rsvd_pebs: count of reserved physical eraseblocks - * @avail_pebs: count of available physical eraseblocks - * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB -- * handling -+ * handling - * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling - * -+ * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end -+ * of UBI initialization - * @vtbl_slots: how many slots are available in the volume table - * @vtbl_size: size of the volume table in bytes - * @vtbl: in-RAM volume table copy -- * @vtbl_mutex: protects on-flash volume table -+ * @volumes_mutex: protects on-flash volume table and serializes volume -+ * changes, like creation, deletion, update, re-size and re-name - * - * @max_ec: current highest erase counter value - * @mean_ec: current mean erase counter value -@@ -234,19 +317,19 @@ - * @used: RB-tree of used physical eraseblocks - * @free: RB-tree of free physical eraseblocks - * @scrub: RB-tree of physical eraseblocks which need scrubbing -- * @prot: protection trees -- * @prot.pnum: protection tree indexed by physical eraseblock numbers -- * @prot.aec: protection tree indexed by absolute erase counter value -- * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, -- * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works -- * fields -+ * @pq: protection queue (contain physical eraseblocks which are temporarily -+ * protected from the wear-leveling worker) -+ * @pq_head: protection queue head -+ * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, -+ * @move_to, @move_to_put @erase_pending, @wl_scheduled and @works -+ * fields -+ * @move_mutex: serializes eraseblock moves -+ * @work_sem: synchronizes the WL worker with use tasks - * @wl_scheduled: non-zero if the wear-leveling was scheduled - * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any -- * physical eraseblock -- * @abs_ec: absolute erase counter -+ * physical eraseblock - * @move_from: physical eraseblock from where the data is being moved - * @move_to: physical eraseblock where the data is being moved to -- * @move_from_put: if the "from" PEB was put - * @move_to_put: if the "to" PEB was put - * @works: list of pending works - * @works_count: count of pending works -@@ -264,68 +347,70 @@ - * @ro_mode: if the UBI device is in read-only mode - * @leb_size: logical eraseblock size - * @leb_start: starting offset of logical eraseblocks within physical -- * eraseblocks -+ * eraseblocks - * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size - * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size - * @vid_hdr_offset: starting offset of the volume identifier header (might be -- * unaligned) -+ * unaligned) - * @vid_hdr_aloffset: starting offset of the VID header aligned to - * @hdrs_min_io_size - * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset - * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or -- * not -+ * not - * @mtd: MTD device descriptor - * - * @peb_buf1: a buffer of PEB size used for different purposes - * @peb_buf2: another buffer of PEB size used for different purposes -- * @buf_mutex: proptects @peb_buf1 and @peb_buf2 -- * @dbg_peb_buf: buffer of PEB size used for debugging -- * @dbg_buf_mutex: proptects @dbg_peb_buf -+ * @buf_mutex: protects @peb_buf1 and @peb_buf2 -+ * @ckvol_mutex: serializes static volume checking when opening -+ * @mult_mutex: serializes operations on multiple volumes, like re-naming -+ * @dbg_peb_buf: buffer of PEB size used for debugging -+ * @dbg_buf_mutex: protects @dbg_peb_buf - */ - struct ubi_device { - struct cdev cdev; - struct device dev; - int ubi_num; - char ubi_name[sizeof(UBI_NAME_STR)+5]; -- int major; - int vol_count; - struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT]; - spinlock_t volumes_lock; -+ int ref_count; - - int rsvd_pebs; - int avail_pebs; - int beb_rsvd_pebs; - int beb_rsvd_level; - -+ int autoresize_vol_id; - int vtbl_slots; - int vtbl_size; - struct ubi_vtbl_record *vtbl; -- struct mutex vtbl_mutex; -+ struct mutex volumes_mutex; - - int max_ec; -+ /* Note, mean_ec is not updated run-time - should be fixed */ - int mean_ec; - -- /* EBA unit's stuff */ -+ /* EBA sub-system's stuff */ - unsigned long long global_sqnum; - spinlock_t ltree_lock; - struct rb_root ltree; - struct mutex alc_mutex; - -- /* Wear-leveling unit's stuff */ -+ /* Wear-leveling sub-system's stuff */ - struct rb_root used; - struct rb_root free; - struct rb_root scrub; -- struct { -- struct rb_root pnum; -- struct rb_root aec; -- } prot; -+ struct list_head pq[UBI_PROT_QUEUE_LEN]; -+ int pq_head; - spinlock_t wl_lock; -+ struct mutex move_mutex; -+ struct rw_semaphore work_sem; - int wl_scheduled; - struct ubi_wl_entry **lookuptbl; -- unsigned long long abs_ec; - struct ubi_wl_entry *move_from; - struct ubi_wl_entry *move_to; -- int move_from_put; - int move_to_put; - struct list_head works; - int works_count; -@@ -333,7 +418,7 @@ - int thread_enabled; - char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; - -- /* I/O unit's stuff */ -+ /* I/O sub-system's stuff */ - long long flash_size; - int peb_count; - int peb_size; -@@ -355,35 +440,49 @@ - void *peb_buf1; - void *peb_buf2; - struct mutex buf_mutex; -+ struct mutex ckvol_mutex; -+ struct mutex mult_mutex; - #ifdef CONFIG_MTD_UBI_DEBUG - void *dbg_peb_buf; - struct mutex dbg_buf_mutex; - #endif - }; - --extern struct file_operations ubi_cdev_operations; --extern struct file_operations ubi_vol_cdev_operations; -+extern struct kmem_cache *ubi_wl_entry_slab; -+extern const struct file_operations ubi_ctrl_cdev_operations; -+extern const struct file_operations ubi_cdev_operations; -+extern const struct file_operations ubi_vol_cdev_operations; - extern struct class *ubi_class; -+extern struct mutex ubi_devices_mutex; - - /* vtbl.c */ - int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, - struct ubi_vtbl_record *vtbl_rec); -+int ubi_vtbl_rename_volumes(struct ubi_device *ubi, -+ struct list_head *rename_list); - int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si); - - /* vmt.c */ - int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); --int ubi_remove_volume(struct ubi_volume_desc *desc); -+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl); - int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); --int ubi_add_volume(struct ubi_device *ubi, int vol_id); --void ubi_free_volume(struct ubi_device *ubi, int vol_id); -+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list); -+int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol); -+void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol); - - /* upd.c */ --int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes); --int ubi_more_update_data(struct ubi_device *ubi, int vol_id, -+int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, -+ long long bytes); -+int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, - const void __user *buf, int count); -+int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, -+ const struct ubi_leb_change_req *req); -+int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, -+ const void __user *buf, int count); - - /* misc.c */ --int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); -+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, -+ int length); - int ubi_check_volume(struct ubi_device *ubi, int vol_id); - void ubi_calculate_reserved(struct ubi_device *ubi); - -@@ -399,20 +498,20 @@ - #endif - - /* eba.c */ --int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum); --int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, -- int offset, int len, int check); --int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum, -+int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, -+ int lnum); -+int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, -+ void *buf, int offset, int len, int check); -+int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, - const void *buf, int offset, int len, int dtype); --int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum, -- const void *buf, int len, int dtype, -+int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, -+ int lnum, const void *buf, int len, int dtype, - int used_ebs); --int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum, -- const void *buf, int len, int dtype); -+int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, -+ int lnum, const void *buf, int len, int dtype); - int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, - struct ubi_vid_hdr *vid_hdr); - int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); --void ubi_eba_close(const struct ubi_device *ubi); - - /* wl.c */ - int ubi_wl_get_peb(struct ubi_device *ubi, int dtype); -@@ -421,6 +520,7 @@ - int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum); - int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); - void ubi_wl_close(struct ubi_device *ubi); -+int ubi_thread(void *u); - - /* io.c */ - int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, -@@ -439,6 +539,14 @@ - int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, - struct ubi_vid_hdr *vid_hdr); - -+/* build.c */ -+int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset); -+int ubi_detach_mtd_dev(int ubi_num, int anyway); -+struct ubi_device *ubi_get_device(int ubi_num); -+void ubi_put_device(struct ubi_device *ubi); -+struct ubi_device *ubi_get_by_major(int major); -+int ubi_major2num(int major); -+ - /* - * ubi_rb_for_each_entry - walk an RB-tree. - * @rb: a pointer to type 'struct rb_node' to to use as a loop counter -@@ -523,8 +631,10 @@ - */ - static inline void ubi_ro_mode(struct ubi_device *ubi) - { -- ubi->ro_mode = 1; -- ubi_warn("switch to read-only mode"); -+ if (!ubi->ro_mode) { -+ ubi->ro_mode = 1; -+ ubi_warn("switch to read-only mode"); -+ } - } - - /** -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/upd.c linux-2.6.24/drivers/mtd/ubi/upd.c ---- linux-2.6.24.orig/drivers/mtd/ubi/upd.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/upd.c 2009-04-17 09:49:26.000000000 +0200 -@@ -22,7 +22,8 @@ - */ - - /* -- * This file contains implementation of the volume update functionality. -+ * This file contains implementation of the volume update and atomic LEB change -+ * functionality. - * - * The update operation is based on the per-volume update marker which is - * stored in the volume table. The update marker is set before the update -@@ -38,36 +39,37 @@ - */ - - #include <linux/err.h> --#include <asm/uaccess.h> --#include <asm/div64.h> -+#include <linux/uaccess.h> - #include "ubi.h" - - /** - * set_update_marker - set update marker. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * -- * This function sets the update marker flag for volume @vol_id. Returns zero -+ * This function sets the update marker flag for volume @vol. Returns zero - * in case of success and a negative error code in case of failure. - */ --static int set_update_marker(struct ubi_device *ubi, int vol_id) -+static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) - { - int err; - struct ubi_vtbl_record vtbl_rec; -- struct ubi_volume *vol = ubi->volumes[vol_id]; - -- dbg_msg("set update marker for volume %d", vol_id); -+ dbg_gen("set update marker for volume %d", vol->vol_id); - - if (vol->upd_marker) { -- ubi_assert(ubi->vtbl[vol_id].upd_marker); -- dbg_msg("already set"); -+ ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); -+ dbg_gen("already set"); - return 0; - } - -- memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); -+ memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], -+ sizeof(struct ubi_vtbl_record)); - vtbl_rec.upd_marker = 1; - -- err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); -+ mutex_lock(&ubi->volumes_mutex); -+ err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); -+ mutex_unlock(&ubi->volumes_mutex); - vol->upd_marker = 1; - return err; - } -@@ -75,38 +77,40 @@ - /** - * clear_update_marker - clear update marker. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @bytes: new data size in bytes - * -- * This function clears the update marker for volume @vol_id, sets new volume -+ * This function clears the update marker for volume @vol, sets new volume - * data size and clears the "corrupted" flag (static volumes only). Returns - * zero in case of success and a negative error code in case of failure. - */ --static int clear_update_marker(struct ubi_device *ubi, int vol_id, long long bytes) -+static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, -+ long long bytes) - { - int err; -- uint64_t tmp; - struct ubi_vtbl_record vtbl_rec; -- struct ubi_volume *vol = ubi->volumes[vol_id]; - -- dbg_msg("clear update marker for volume %d", vol_id); -+ dbg_gen("clear update marker for volume %d", vol->vol_id); - -- memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); -+ memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], -+ sizeof(struct ubi_vtbl_record)); - ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); - vtbl_rec.upd_marker = 0; - - if (vol->vol_type == UBI_STATIC_VOLUME) { - vol->corrupted = 0; -- vol->used_bytes = tmp = bytes; -- vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size); -- vol->used_ebs = tmp; -+ vol->used_bytes = bytes; -+ vol->used_ebs = div_u64_rem(bytes, vol->usable_leb_size, -+ &vol->last_eb_bytes); - if (vol->last_eb_bytes) - vol->used_ebs += 1; - else - vol->last_eb_bytes = vol->usable_leb_size; - } - -- err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); -+ mutex_lock(&ubi->volumes_mutex); -+ err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); -+ mutex_unlock(&ubi->volumes_mutex); - vol->upd_marker = 0; - return err; - } -@@ -114,35 +118,35 @@ - /** - * ubi_start_update - start volume update. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @bytes: update bytes - * - * This function starts volume update operation. If @bytes is zero, the volume - * is just wiped out. Returns zero in case of success and a negative error code - * in case of failure. - */ --int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes) -+int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, -+ long long bytes) - { - int i, err; -- uint64_t tmp; -- struct ubi_volume *vol = ubi->volumes[vol_id]; - -- dbg_msg("start update of volume %d, %llu bytes", vol_id, bytes); -+ dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes); -+ ubi_assert(!vol->updating && !vol->changing_leb); - vol->updating = 1; - -- err = set_update_marker(ubi, vol_id); -+ err = set_update_marker(ubi, vol); - if (err) - return err; - - /* Before updating - wipe out the volume */ - for (i = 0; i < vol->reserved_pebs; i++) { -- err = ubi_eba_unmap_leb(ubi, vol_id, i); -+ err = ubi_eba_unmap_leb(ubi, vol, i); - if (err) - return err; - } - - if (bytes == 0) { -- err = clear_update_marker(ubi, vol_id, 0); -+ err = clear_update_marker(ubi, vol, 0); - if (err) - return err; - err = ubi_wl_flush(ubi); -@@ -154,18 +158,50 @@ - if (!vol->upd_buf) - return -ENOMEM; - -- tmp = bytes; -- vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size); -- vol->upd_ebs += tmp; -+ vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1, -+ vol->usable_leb_size); - vol->upd_bytes = bytes; - vol->upd_received = 0; - return 0; - } - - /** -+ * ubi_start_leb_change - start atomic LEB change. -+ * @ubi: UBI device description object -+ * @vol: volume description object -+ * @req: operation request -+ * -+ * This function starts atomic LEB change operation. Returns zero in case of -+ * success and a negative error code in case of failure. -+ */ -+int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, -+ const struct ubi_leb_change_req *req) -+{ -+ ubi_assert(!vol->updating && !vol->changing_leb); -+ -+ dbg_gen("start changing LEB %d:%d, %u bytes", -+ vol->vol_id, req->lnum, req->bytes); -+ if (req->bytes == 0) -+ return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0, -+ req->dtype); -+ -+ vol->upd_bytes = req->bytes; -+ vol->upd_received = 0; -+ vol->changing_leb = 1; -+ vol->ch_lnum = req->lnum; -+ vol->ch_dtype = req->dtype; -+ -+ vol->upd_buf = vmalloc(req->bytes); -+ if (!vol->upd_buf) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+/** - * write_leb - write update data. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * @lnum: logical eraseblock number - * @buf: data to write - * @len: data size -@@ -191,25 +227,22 @@ - * This function returns zero in case of success and a negative error code in - * case of failure. - */ --static int write_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, -- int len, int used_ebs) -+static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, -+ void *buf, int len, int used_ebs) - { -- int err, l; -- struct ubi_volume *vol = ubi->volumes[vol_id]; -+ int err; - - if (vol->vol_type == UBI_DYNAMIC_VOLUME) { -- l = ALIGN(len, ubi->min_io_size); -- memset(buf + len, 0xFF, l - len); -+ int l = ALIGN(len, ubi->min_io_size); - -- l = ubi_calc_data_len(ubi, buf, l); -- if (l == 0) { -- dbg_msg("all %d bytes contain 0xFF - skip", len); -+ memset(buf + len, 0xFF, l - len); -+ len = ubi_calc_data_len(ubi, buf, l); -+ if (len == 0) { -+ dbg_gen("all %d bytes contain 0xFF - skip", len); - return 0; - } -- if (len != l) -- dbg_msg("skip last %d bytes (0xFF)", len - l); - -- err = ubi_eba_write_leb(ubi, vol_id, lnum, buf, 0, l, -+ err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, - UBI_UNKNOWN); - } else { - /* -@@ -222,7 +255,7 @@ - * contain zeros, not random trash. - */ - memset(buf + len, 0, vol->usable_leb_size - len); -- err = ubi_eba_write_leb_st(ubi, vol_id, lnum, buf, len, -+ err = ubi_eba_write_leb_st(ubi, vol, lnum, buf, len, - UBI_UNKNOWN, used_ebs); - } - -@@ -231,33 +264,29 @@ - - /** - * ubi_more_update_data - write more update data. -+ * @ubi: UBI device description object - * @vol: volume description object - * @buf: write data (user-space memory buffer) - * @count: how much bytes to write - * - * This function writes more data to the volume which is being updated. It may -- * be called arbitrary number of times until all of the update data arrive. -- * This function returns %0 in case of success, number of bytes written during -- * the last call if the whole volume update was successfully finished, and a -+ * be called arbitrary number of times until all the update data arriveis. This -+ * function returns %0 in case of success, number of bytes written during the -+ * last call if the whole volume update has been successfully finished, and a - * negative error code in case of failure. - */ --int ubi_more_update_data(struct ubi_device *ubi, int vol_id, -+int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, - const void __user *buf, int count) - { -- uint64_t tmp; -- struct ubi_volume *vol = ubi->volumes[vol_id]; - int lnum, offs, err = 0, len, to_write = count; - -- dbg_msg("write %d of %lld bytes, %lld already passed", -+ dbg_gen("write %d of %lld bytes, %lld already passed", - count, vol->upd_bytes, vol->upd_received); - - if (ubi->ro_mode) - return -EROFS; - -- tmp = vol->upd_received; -- offs = do_div(tmp, vol->usable_leb_size); -- lnum = tmp; -- -+ lnum = div_u64_rem(vol->upd_received, vol->usable_leb_size, &offs); - if (vol->upd_received + count > vol->upd_bytes) - to_write = count = vol->upd_bytes - vol->upd_received; - -@@ -290,8 +319,8 @@ - * is the last chunk, it's time to flush the buffer. - */ - ubi_assert(flush_len <= vol->usable_leb_size); -- err = write_leb(ubi, vol_id, lnum, vol->upd_buf, -- flush_len, vol->upd_ebs); -+ err = write_leb(ubi, vol, lnum, vol->upd_buf, flush_len, -+ vol->upd_ebs); - if (err) - return err; - } -@@ -318,8 +347,8 @@ - - if (len == vol->usable_leb_size || - vol->upd_received + len == vol->upd_bytes) { -- err = write_leb(ubi, vol_id, lnum, vol->upd_buf, len, -- vol->upd_ebs); -+ err = write_leb(ubi, vol, lnum, vol->upd_buf, -+ len, vol->upd_ebs); - if (err) - break; - } -@@ -333,16 +362,72 @@ - ubi_assert(vol->upd_received <= vol->upd_bytes); - if (vol->upd_received == vol->upd_bytes) { - /* The update is finished, clear the update marker */ -- err = clear_update_marker(ubi, vol_id, vol->upd_bytes); -+ err = clear_update_marker(ubi, vol, vol->upd_bytes); - if (err) - return err; - err = ubi_wl_flush(ubi); - if (err == 0) { -+ vol->updating = 0; - err = to_write; - vfree(vol->upd_buf); -- vol->updating = 0; - } - } - - return err; - } -+ -+/** -+ * ubi_more_leb_change_data - accept more data for atomic LEB change. -+ * @ubi: UBI device description object -+ * @vol: volume description object -+ * @buf: write data (user-space memory buffer) -+ * @count: how much bytes to write -+ * -+ * This function accepts more data to the volume which is being under the -+ * "atomic LEB change" operation. It may be called arbitrary number of times -+ * until all data arrives. This function returns %0 in case of success, number -+ * of bytes written during the last call if the whole "atomic LEB change" -+ * operation has been successfully finished, and a negative error code in case -+ * of failure. -+ */ -+int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, -+ const void __user *buf, int count) -+{ -+ int err; -+ -+ dbg_gen("write %d of %lld bytes, %lld already passed", -+ count, vol->upd_bytes, vol->upd_received); -+ -+ if (ubi->ro_mode) -+ return -EROFS; -+ -+ if (vol->upd_received + count > vol->upd_bytes) -+ count = vol->upd_bytes - vol->upd_received; -+ -+ err = copy_from_user(vol->upd_buf + vol->upd_received, buf, count); -+ if (err) -+ return -EFAULT; -+ -+ vol->upd_received += count; -+ -+ if (vol->upd_received == vol->upd_bytes) { -+ int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); -+ -+ memset(vol->upd_buf + vol->upd_bytes, 0xFF, -+ len - vol->upd_bytes); -+ len = ubi_calc_data_len(ubi, vol->upd_buf, len); -+ err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, -+ vol->upd_buf, len, UBI_UNKNOWN); -+ if (err) -+ return err; -+ } -+ -+ ubi_assert(vol->upd_received <= vol->upd_bytes); -+ if (vol->upd_received == vol->upd_bytes) { -+ vol->changing_leb = 0; -+ err = count; -+ vfree(vol->upd_buf); -+ } -+ -+ return err; -+} -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/vmt.c linux-2.6.24/drivers/mtd/ubi/vmt.c ---- linux-2.6.24.orig/drivers/mtd/ubi/vmt.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/vmt.c 2009-04-17 09:49:26.000000000 +0200 -@@ -24,13 +24,12 @@ - */ - - #include <linux/err.h> --#include <asm/div64.h> +-#include <linux/math64.h> #include "ubi.h" - #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID --static void paranoid_check_volumes(struct ubi_device *ubi); -+static int paranoid_check_volumes(struct ubi_device *ubi); - #else --#define paranoid_check_volumes(ubi) -+#define paranoid_check_volumes(ubi) 0 - #endif - - static ssize_t vol_attribute_show(struct device *dev, -@@ -63,21 +62,30 @@ - * B. process 2 removes volume Y; - * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file; - * -- * What we want to do in a situation like that is to return error when the file -- * is read. This is done by means of the 'removed' flag and the 'vol_lock' of -- * the UBI volume description object. -+ * In this situation, this function will return %-ENODEV because it will find -+ * out that the volume was removed from the @ubi->volumes array. - */ - static ssize_t vol_attribute_show(struct device *dev, - struct device_attribute *attr, char *buf) - { - int ret; - struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); -+ struct ubi_device *ubi; - -- spin_lock(&vol->ubi->volumes_lock); -- if (vol->removed) { -- spin_unlock(&vol->ubi->volumes_lock); -+ ubi = ubi_get_device(vol->ubi->ubi_num); -+ if (!ubi) -+ return -ENODEV; -+ -+ spin_lock(&ubi->volumes_lock); -+ if (!ubi->volumes[vol->vol_id]) { -+ spin_unlock(&ubi->volumes_lock); -+ ubi_put_device(ubi); - return -ENODEV; - } -+ /* Take a reference to prevent volume removal */ -+ vol->ref_count += 1; -+ spin_unlock(&ubi->volumes_lock); -+ - if (attr == &attr_vol_reserved_ebs) - ret = sprintf(buf, "%d\n", vol->reserved_pebs); - else if (attr == &attr_vol_type) { -@@ -94,15 +102,22 @@ - ret = sprintf(buf, "%d\n", vol->corrupted); - else if (attr == &attr_vol_alignment) - ret = sprintf(buf, "%d\n", vol->alignment); -- else if (attr == &attr_vol_usable_eb_size) { -+ else if (attr == &attr_vol_usable_eb_size) - ret = sprintf(buf, "%d\n", vol->usable_leb_size); -- } else if (attr == &attr_vol_data_bytes) -+ else if (attr == &attr_vol_data_bytes) - ret = sprintf(buf, "%lld\n", vol->used_bytes); - else if (attr == &attr_vol_upd_marker) - ret = sprintf(buf, "%d\n", vol->upd_marker); - else -- BUG(); -- spin_unlock(&vol->ubi->volumes_lock); -+ /* This must be a bug */ -+ ret = -EINVAL; -+ -+ /* We've done the operation, drop volume and UBI device references */ -+ spin_lock(&ubi->volumes_lock); -+ vol->ref_count -= 1; -+ ubi_assert(vol->ref_count >= 0); -+ spin_unlock(&ubi->volumes_lock); -+ ubi_put_device(ubi); - return ret; - } - -@@ -110,7 +125,8 @@ - static void vol_release(struct device *dev) - { - struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); -- ubi_assert(vol->removed); -+ -+ kfree(vol->eba_tbl); - kfree(vol); - } - -@@ -152,9 +168,7 @@ - if (err) - return err; - err = device_create_file(&vol->dev, &attr_vol_upd_marker); -- if (err) -- return err; -- return 0; -+ return err; - } - - /** -@@ -180,16 +194,17 @@ - * @req: volume creation request - * - * This function creates volume described by @req. If @req->vol_id id -- * %UBI_VOL_NUM_AUTO, this function automatically assigne ID to the new volume -+ * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume - * and saves it in @req->vol_id. Returns zero in case of success and a negative -- * error code in case of failure. -+ * error code in case of failure. Note, the caller has to have the -+ * @ubi->volumes_mutex locked. - */ - int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) - { -- int i, err, vol_id = req->vol_id; -+ int i, err, vol_id = req->vol_id, do_free = 1; - struct ubi_volume *vol; - struct ubi_vtbl_record vtbl_rec; -- uint64_t bytes; -+ dev_t dev; - - if (ubi->ro_mode) - return -EROFS; -@@ -199,10 +214,9 @@ - return -ENOMEM; - - spin_lock(&ubi->volumes_lock); -- - if (vol_id == UBI_VOL_NUM_AUTO) { - /* Find unused volume ID */ -- dbg_msg("search for vacant volume ID"); -+ dbg_gen("search for vacant volume ID"); - for (i = 0; i < ubi->vtbl_slots; i++) - if (!ubi->volumes[i]) { - vol_id = i; -@@ -217,7 +231,7 @@ - req->vol_id = vol_id; - } - -- dbg_msg("volume ID %d, %llu bytes, type %d, name %s", -+ dbg_gen("volume ID %d, %llu bytes, type %d, name %s", - vol_id, (unsigned long long)req->bytes, - (int)req->vol_type, req->name); - -@@ -237,12 +251,10 @@ - goto out_unlock; - } - -- /* Calculate how many eraseblocks are requested */ -+ /* Calculate how many eraseblocks are requested */ - vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; -- bytes = req->bytes; -- if (do_div(bytes, vol->usable_leb_size)) -- vol->reserved_pebs = 1; -- vol->reserved_pebs += bytes; -+ vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1, -+ vol->usable_leb_size); - - /* Reserve physical eraseblocks */ - if (vol->reserved_pebs > ubi->avail_pebs) { -@@ -252,17 +264,15 @@ - } - ubi->avail_pebs -= vol->reserved_pebs; - ubi->rsvd_pebs += vol->reserved_pebs; -+ spin_unlock(&ubi->volumes_lock); - - vol->vol_id = vol_id; - vol->alignment = req->alignment; - vol->data_pad = ubi->leb_size % vol->alignment; - vol->vol_type = req->vol_type; - vol->name_len = req->name_len; -- memcpy(vol->name, req->name, vol->name_len + 1); -- vol->exclusive = 1; -+ memcpy(vol->name, req->name, vol->name_len); - vol->ubi = ubi; -- ubi->volumes[vol_id] = vol; -- spin_unlock(&ubi->volumes_lock); - - /* - * Finish all pending erases because there may be some LEBs belonging -@@ -287,10 +297,10 @@ - vol->used_bytes = - (long long)vol->used_ebs * vol->usable_leb_size; - } else { -- bytes = vol->used_bytes; -- vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size); -- vol->used_ebs = bytes; -- if (vol->last_eb_bytes) -+ vol->used_ebs = div_u64_rem(vol->used_bytes, -+ vol->usable_leb_size, -+ &vol->last_eb_bytes); -+ if (vol->last_eb_bytes != 0) - vol->used_ebs += 1; - else - vol->last_eb_bytes = vol->usable_leb_size; -@@ -299,9 +309,10 @@ - /* Register character device for the volume */ - cdev_init(&vol->cdev, &ubi_vol_cdev_operations); - vol->cdev.owner = THIS_MODULE; -- err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol_id + 1), 1); -+ dev = MKDEV(MAJOR(ubi->cdev.dev), vol_id + 1); -+ err = cdev_add(&vol->cdev, dev, 1); - if (err) { -- ubi_err("cannot add character device for volume %d", vol_id); -+ ubi_err("cannot add character device"); - goto out_mapping; - } - -@@ -311,12 +322,15 @@ - - vol->dev.release = vol_release; - vol->dev.parent = &ubi->dev; -- vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1); -+ vol->dev.devt = dev; - vol->dev.class = ubi_class; -+ - sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); - err = device_register(&vol->dev); -- if (err) -+ if (err) { -+ ubi_err("cannot register device"); - goto out_gluebi; -+ } - - err = volume_sysfs_init(ubi, vol); - if (err) -@@ -332,98 +346,108 @@ - vtbl_rec.vol_type = UBI_VID_DYNAMIC; - else - vtbl_rec.vol_type = UBI_VID_STATIC; -- memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); -+ memcpy(vtbl_rec.name, vol->name, vol->name_len); - - err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); - if (err) - goto out_sysfs; - - spin_lock(&ubi->volumes_lock); -+ ubi->volumes[vol_id] = vol; - ubi->vol_count += 1; -- vol->exclusive = 0; - spin_unlock(&ubi->volumes_lock); - -- paranoid_check_volumes(ubi); -- return 0; -+ err = paranoid_check_volumes(ubi); -+ return err; - -+out_sysfs: -+ /* -+ * We have registered our device, we should not free the volume -+ * description object in this function in case of an error - it is -+ * freed by the release function. -+ * -+ * Get device reference to prevent the release function from being -+ * called just after sysfs has been closed. -+ */ -+ do_free = 0; -+ get_device(&vol->dev); -+ volume_sysfs_close(vol); - out_gluebi: -- err = ubi_destroy_gluebi(vol); -+ if (ubi_destroy_gluebi(vol)) -+ dbg_err("cannot destroy gluebi for volume %d:%d", -+ ubi->ubi_num, vol_id); - out_cdev: - cdev_del(&vol->cdev); - out_mapping: -- kfree(vol->eba_tbl); -+ if (do_free) -+ kfree(vol->eba_tbl); - out_acc: - spin_lock(&ubi->volumes_lock); - ubi->rsvd_pebs -= vol->reserved_pebs; - ubi->avail_pebs += vol->reserved_pebs; -- ubi->volumes[vol_id] = NULL; - out_unlock: - spin_unlock(&ubi->volumes_lock); -- kfree(vol); -- return err; -- -- /* -- * We are registered, so @vol is destroyed in the release function and -- * we have to de-initialize differently. -- */ --out_sysfs: -- err = ubi_destroy_gluebi(vol); -- cdev_del(&vol->cdev); -- kfree(vol->eba_tbl); -- spin_lock(&ubi->volumes_lock); -- ubi->rsvd_pebs -= vol->reserved_pebs; -- ubi->avail_pebs += vol->reserved_pebs; -- ubi->volumes[vol_id] = NULL; -- spin_unlock(&ubi->volumes_lock); -- volume_sysfs_close(vol); -+ if (do_free) -+ kfree(vol); -+ else -+ put_device(&vol->dev); -+ ubi_err("cannot create volume %d, error %d", vol_id, err); - return err; - } - - /** - * ubi_remove_volume - remove volume. - * @desc: volume descriptor -+ * @no_vtbl: do not change volume table if not zero - * - * This function removes volume described by @desc. The volume has to be opened - * in "exclusive" mode. Returns zero in case of success and a negative error -- * code in case of failure. -+ * code in case of failure. The caller has to have the @ubi->volumes_mutex -+ * locked. - */ --int ubi_remove_volume(struct ubi_volume_desc *desc) -+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) - { - struct ubi_volume *vol = desc->vol; - struct ubi_device *ubi = vol->ubi; - int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; - -- dbg_msg("remove UBI volume %d", vol_id); -+ dbg_gen("remove UBI volume %d", vol_id); - ubi_assert(desc->mode == UBI_EXCLUSIVE); - ubi_assert(vol == ubi->volumes[vol_id]); - - if (ubi->ro_mode) - return -EROFS; - -+ spin_lock(&ubi->volumes_lock); -+ if (vol->ref_count > 1) { -+ /* -+ * The volume is busy, probably someone is reading one of its -+ * sysfs files. -+ */ -+ err = -EBUSY; -+ goto out_unlock; -+ } -+ ubi->volumes[vol_id] = NULL; -+ spin_unlock(&ubi->volumes_lock); -+ - err = ubi_destroy_gluebi(vol); - if (err) -- return err; -+ goto out_err; - -- err = ubi_change_vtbl_record(ubi, vol_id, NULL); -- if (err) -- return err; -+ if (!no_vtbl) { -+ err = ubi_change_vtbl_record(ubi, vol_id, NULL); -+ if (err) -+ goto out_err; -+ } - - for (i = 0; i < vol->reserved_pebs; i++) { -- err = ubi_eba_unmap_leb(ubi, vol_id, i); -+ err = ubi_eba_unmap_leb(ubi, vol, i); - if (err) -- return err; -+ goto out_err; - } - -- spin_lock(&ubi->volumes_lock); -- vol->removed = 1; -- ubi->volumes[vol_id] = NULL; -- spin_unlock(&ubi->volumes_lock); -- -- kfree(vol->eba_tbl); -- vol->eba_tbl = NULL; - cdev_del(&vol->cdev); - volume_sysfs_close(vol); -- kfree(desc); - - spin_lock(&ubi->volumes_lock); - ubi->rsvd_pebs -= reserved_pebs; -@@ -440,9 +464,17 @@ - ubi->vol_count -= 1; - spin_unlock(&ubi->volumes_lock); - -- paranoid_check_volumes(ubi); -- module_put(THIS_MODULE); -- return 0; -+ if (!no_vtbl) -+ err = paranoid_check_volumes(ubi); -+ return err; -+ -+out_err: -+ ubi_err("cannot remove volume %d, error %d", vol_id, err); -+ spin_lock(&ubi->volumes_lock); -+ ubi->volumes[vol_id] = vol; -+out_unlock: -+ spin_unlock(&ubi->volumes_lock); -+ return err; - } - - /** -@@ -450,8 +482,9 @@ - * @desc: volume descriptor - * @reserved_pebs: new size in physical eraseblocks - * -- * This function returns zero in case of success, and a negative error code in -- * case of failure. -+ * This function re-sizes the volume and returns zero in case of success, and a -+ * negative error code in case of failure. The caller has to have the -+ * @ubi->volumes_mutex locked. - */ - int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) - { -@@ -464,10 +497,8 @@ - if (ubi->ro_mode) - return -EROFS; - -- dbg_msg("re-size volume %d to from %d to %d PEBs", -+ dbg_gen("re-size volume %d to from %d to %d PEBs", - vol_id, vol->reserved_pebs, reserved_pebs); -- ubi_assert(desc->mode == UBI_EXCLUSIVE); -- ubi_assert(vol == ubi->volumes[vol_id]); - - if (vol->vol_type == UBI_STATIC_VOLUME && - reserved_pebs < vol->used_ebs) { -@@ -487,6 +518,14 @@ - for (i = 0; i < reserved_pebs; i++) - new_mapping[i] = UBI_LEB_UNMAPPED; - -+ spin_lock(&ubi->volumes_lock); -+ if (vol->ref_count > 1) { -+ spin_unlock(&ubi->volumes_lock); -+ err = -EBUSY; -+ goto out_free; -+ } -+ spin_unlock(&ubi->volumes_lock); -+ - /* Reserve physical eraseblocks */ - pebs = reserved_pebs - vol->reserved_pebs; - if (pebs > 0) { -@@ -516,7 +555,7 @@ - - if (pebs < 0) { - for (i = 0; i < -pebs; i++) { -- err = ubi_eba_unmap_leb(ubi, vol_id, reserved_pebs + i); -+ err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); - if (err) - goto out_acc; - } -@@ -547,8 +586,8 @@ - (long long)vol->used_ebs * vol->usable_leb_size; - } - -- paranoid_check_volumes(ubi); -- return 0; -+ err = paranoid_check_volumes(ubi); -+ return err; - - out_acc: - if (pebs > 0) { -@@ -563,29 +602,67 @@ - } - - /** -+ * ubi_rename_volumes - re-name UBI volumes. -+ * @ubi: UBI device description object -+ * @rename_list: list of &struct ubi_rename_entry objects -+ * -+ * This function re-names or removes volumes specified in the re-name list. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list) -+{ -+ int err; -+ struct ubi_rename_entry *re; -+ -+ err = ubi_vtbl_rename_volumes(ubi, rename_list); -+ if (err) -+ return err; -+ -+ list_for_each_entry(re, rename_list, list) { -+ if (re->remove) { -+ err = ubi_remove_volume(re->desc, 1); -+ if (err) -+ break; -+ } else { -+ struct ubi_volume *vol = re->desc->vol; -+ -+ spin_lock(&ubi->volumes_lock); -+ vol->name_len = re->new_name_len; -+ memcpy(vol->name, re->new_name, re->new_name_len + 1); -+ spin_unlock(&ubi->volumes_lock); -+ } -+ } -+ -+ if (!err) -+ err = paranoid_check_volumes(ubi); -+ return err; -+} -+ -+/** - * ubi_add_volume - add volume. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * -- * This function adds an existin volume and initializes all its data -- * structures. Returnes zero in case of success and a negative error code in -+ * This function adds an existing volume and initializes all its data -+ * structures. Returns zero in case of success and a negative error code in - * case of failure. - */ --int ubi_add_volume(struct ubi_device *ubi, int vol_id) -+int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) - { -- int err; -- struct ubi_volume *vol = ubi->volumes[vol_id]; -+ int err, vol_id = vol->vol_id; -+ dev_t dev; - -- dbg_msg("add volume %d", vol_id); -- ubi_dbg_dump_vol_info(vol); -- ubi_assert(vol); -+ dbg_gen("add volume %d", vol_id); - - /* Register character device for the volume */ - cdev_init(&vol->cdev, &ubi_vol_cdev_operations); - vol->cdev.owner = THIS_MODULE; -- err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol->vol_id + 1), 1); -+ dev = MKDEV(MAJOR(ubi->cdev.dev), vol->vol_id + 1); -+ err = cdev_add(&vol->cdev, dev, 1); - if (err) { -- ubi_err("cannot add character device for volume %d", vol_id); -+ ubi_err("cannot add character device for volume %d, error %d", -+ vol_id, err); - return err; - } - -@@ -595,7 +672,7 @@ - - vol->dev.release = vol_release; - vol->dev.parent = &ubi->dev; -- vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1); -+ vol->dev.devt = dev; - vol->dev.class = ubi_class; - sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); - err = device_register(&vol->dev); -@@ -610,8 +687,8 @@ - return err; - } - -- paranoid_check_volumes(ubi); -- return 0; -+ err = paranoid_check_volumes(ubi); -+ return err; - - out_gluebi: - err = ubi_destroy_gluebi(vol); -@@ -623,22 +700,19 @@ - /** - * ubi_free_volume - free volume. - * @ubi: UBI device description object -- * @vol_id: volume ID -+ * @vol: volume description object - * -- * This function frees all resources for volume @vol_id but does not remove it. -+ * This function frees all resources for volume @vol but does not remove it. - * Used only when the UBI device is detached. - */ --void ubi_free_volume(struct ubi_device *ubi, int vol_id) -+void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) - { - int err; -- struct ubi_volume *vol = ubi->volumes[vol_id]; - -- dbg_msg("free volume %d", vol_id); -- ubi_assert(vol); -+ dbg_gen("free volume %d", vol->vol_id); - -- vol->removed = 1; -+ ubi->volumes[vol->vol_id] = NULL; - err = ubi_destroy_gluebi(vol); -- ubi->volumes[vol_id] = NULL; - cdev_del(&vol->cdev); - volume_sysfs_close(vol); - } -@@ -649,8 +723,10 @@ - * paranoid_check_volume - check volume information. - * @ubi: UBI device description object - * @vol_id: volume ID -+ * -+ * Returns zero if volume is all right and a a negative error code if not. - */ --static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) -+static int paranoid_check_volume(struct ubi_device *ubi, int vol_id) - { - int idx = vol_id2idx(ubi, vol_id); - int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; -@@ -668,16 +744,7 @@ - goto fail; - } - spin_unlock(&ubi->volumes_lock); -- return; -- } -- -- if (vol->exclusive) { -- /* -- * The volume may be being created at the moment, do not check -- * it (e.g., it may be in the middle of ubi_create_volume(). -- */ -- spin_unlock(&ubi->volumes_lock); -- return; -+ return 0; - } - - if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || -@@ -690,7 +757,7 @@ - goto fail; - } - -- n = vol->alignment % ubi->min_io_size; -+ n = vol->alignment & (ubi->min_io_size - 1); - if (vol->alignment != 1 && n) { - ubi_err("alignment is not multiple of min I/O unit"); - goto fail; -@@ -708,11 +775,6 @@ - goto fail; - } - -- if (vol->upd_marker != 0 && vol->upd_marker != 1) { -- ubi_err("bad upd_marker"); -- goto fail; -- } -- - if (vol->upd_marker && vol->corrupted) { - dbg_err("update marker and corrupted simultaneously"); - goto fail; -@@ -747,7 +809,7 @@ - - n = (long long)vol->used_ebs * vol->usable_leb_size; - if (vol->vol_type == UBI_DYNAMIC_VOLUME) { -- if (vol->corrupted != 0) { -+ if (vol->corrupted) { - ubi_err("corrupted dynamic volume"); - goto fail; - } -@@ -764,10 +826,6 @@ - goto fail; - } - } else { -- if (vol->corrupted != 0 && vol->corrupted != 1) { -- ubi_err("bad corrupted"); -- goto fail; -- } - if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) { - ubi_err("bad used_ebs"); - goto fail; -@@ -796,33 +854,39 @@ - - if (alignment != vol->alignment || data_pad != vol->data_pad || - upd_marker != vol->upd_marker || vol_type != vol->vol_type || -- name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { -+ name_len != vol->name_len || strncmp(name, vol->name, name_len)) { - ubi_err("volume info is different"); - goto fail; - } - - spin_unlock(&ubi->volumes_lock); -- return; -+ return 0; - - fail: - ubi_err("paranoid check failed for volume %d", vol_id); -- ubi_dbg_dump_vol_info(vol); -+ if (vol) -+ ubi_dbg_dump_vol_info(vol); - ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); - spin_unlock(&ubi->volumes_lock); -- BUG(); -+ return -EINVAL; - } - - /** - * paranoid_check_volumes - check information about all volumes. - * @ubi: UBI device description object -+ * -+ * Returns zero if volumes are all right and a a negative error code if not. - */ --static void paranoid_check_volumes(struct ubi_device *ubi) -+static int paranoid_check_volumes(struct ubi_device *ubi) - { -- int i; -+ int i, err = 0; - -- mutex_lock(&ubi->vtbl_mutex); -- for (i = 0; i < ubi->vtbl_slots; i++) -- paranoid_check_volume(ubi, i); -- mutex_unlock(&ubi->vtbl_mutex); -+ for (i = 0; i < ubi->vtbl_slots; i++) { -+ err = paranoid_check_volume(ubi, i); -+ if (err) -+ break; -+ } -+ -+ return err; - } - #endif -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/vtbl.c linux-2.6.24/drivers/mtd/ubi/vtbl.c ---- linux-2.6.24.orig/drivers/mtd/ubi/vtbl.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/vtbl.c 2009-04-17 09:49:26.000000000 +0200 -@@ -86,8 +86,10 @@ - { - int i, err; - uint32_t crc; -+ struct ubi_volume *layout_vol; - - ubi_assert(idx >= 0 && idx < ubi->vtbl_slots); -+ layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; - - if (!vtbl_rec) - vtbl_rec = &empty_vtbl_record; -@@ -96,31 +98,75 @@ - vtbl_rec->crc = cpu_to_be32(crc); - } - -- mutex_lock(&ubi->vtbl_mutex); - memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record)); - for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { -- err = ubi_eba_unmap_leb(ubi, UBI_LAYOUT_VOL_ID, i); -- if (err) { -- mutex_unlock(&ubi->vtbl_mutex); -+ err = ubi_eba_unmap_leb(ubi, layout_vol, i); -+ if (err) - return err; -- } -- err = ubi_eba_write_leb(ubi, UBI_LAYOUT_VOL_ID, i, ubi->vtbl, 0, -+ -+ err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, - ubi->vtbl_size, UBI_LONGTERM); -- if (err) { -- mutex_unlock(&ubi->vtbl_mutex); -+ if (err) - return err; -- } - } - - paranoid_vtbl_check(ubi); -- mutex_unlock(&ubi->vtbl_mutex); -- return ubi_wl_flush(ubi); -+ return 0; - } - - /** -- * vol_til_check - check if volume table is not corrupted and contains sensible -- * data. -+ * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table. -+ * @ubi: UBI device description object -+ * @rename_list: list of &struct ubi_rename_entry objects - * -+ * This function re-names multiple volumes specified in @req in the volume -+ * table. Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubi_vtbl_rename_volumes(struct ubi_device *ubi, -+ struct list_head *rename_list) -+{ -+ int i, err; -+ struct ubi_rename_entry *re; -+ struct ubi_volume *layout_vol; -+ -+ list_for_each_entry(re, rename_list, list) { -+ uint32_t crc; -+ struct ubi_volume *vol = re->desc->vol; -+ struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id]; -+ -+ if (re->remove) { -+ memcpy(vtbl_rec, &empty_vtbl_record, -+ sizeof(struct ubi_vtbl_record)); -+ continue; -+ } -+ -+ vtbl_rec->name_len = cpu_to_be16(re->new_name_len); -+ memcpy(vtbl_rec->name, re->new_name, re->new_name_len); -+ memset(vtbl_rec->name + re->new_name_len, 0, -+ UBI_VOL_NAME_MAX + 1 - re->new_name_len); -+ crc = crc32(UBI_CRC32_INIT, vtbl_rec, -+ UBI_VTBL_RECORD_SIZE_CRC); -+ vtbl_rec->crc = cpu_to_be32(crc); -+ } -+ -+ layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; -+ for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { -+ err = ubi_eba_unmap_leb(ubi, layout_vol, i); -+ if (err) -+ return err; -+ -+ err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, -+ ubi->vtbl_size, UBI_LONGTERM); -+ if (err) -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * vtbl_check - check if volume table is not corrupted and sensible. - * @ubi: UBI device description object - * @vtbl: volume table - * -@@ -131,7 +177,7 @@ - const struct ubi_vtbl_record *vtbl) - { - int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; -- int upd_marker; -+ int upd_marker, err; - uint32_t crc; - const char *name; - -@@ -157,7 +203,7 @@ - if (reserved_pebs == 0) { - if (memcmp(&vtbl[i], &empty_vtbl_record, - UBI_VTBL_RECORD_SIZE)) { -- dbg_err("bad empty record"); -+ err = 2; - goto bad; - } - continue; -@@ -165,56 +211,57 @@ - - if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || - name_len < 0) { -- dbg_err("negative values"); -+ err = 3; - goto bad; - } - - if (alignment > ubi->leb_size || alignment == 0) { -- dbg_err("bad alignment"); -+ err = 4; - goto bad; - } - -- n = alignment % ubi->min_io_size; -+ n = alignment & (ubi->min_io_size - 1); - if (alignment != 1 && n) { -- dbg_err("alignment is not multiple of min I/O unit"); -+ err = 5; - goto bad; - } - - n = ubi->leb_size % alignment; - if (data_pad != n) { - dbg_err("bad data_pad, has to be %d", n); -+ err = 6; - goto bad; - } - - if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { -- dbg_err("bad vol_type"); -+ err = 7; - goto bad; - } - - if (upd_marker != 0 && upd_marker != 1) { -- dbg_err("bad upd_marker"); -+ err = 8; - goto bad; - } - - if (reserved_pebs > ubi->good_peb_count) { -- dbg_err("too large reserved_pebs, good PEBs %d", -- ubi->good_peb_count); -+ dbg_err("too large reserved_pebs %d, good PEBs %d", -+ reserved_pebs, ubi->good_peb_count); -+ err = 9; - goto bad; - } - - if (name_len > UBI_VOL_NAME_MAX) { -- dbg_err("too long volume name, max %d", -- UBI_VOL_NAME_MAX); -+ err = 10; - goto bad; - } - - if (name[0] == '\0') { -- dbg_err("NULL volume name"); -+ err = 11; - goto bad; - } - - if (name_len != strnlen(name, name_len + 1)) { -- dbg_err("bad name_len"); -+ err = 12; - goto bad; - } - } -@@ -239,7 +286,7 @@ - return 0; - - bad: -- ubi_err("volume table check failed, record %d", i); -+ ubi_err("volume table check failed: record %d, error %d", i, err); - ubi_dbg_dump_vtbl_record(&vtbl[i], i); - return -EINVAL; - } -@@ -273,7 +320,7 @@ - * this volume table copy was found during scanning. It has to be wiped - * out. - */ -- sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID); -+ sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); - if (sv) - old_seb = ubi_scan_find_seb(sv, copy); - -@@ -285,13 +332,12 @@ - } - - vid_hdr->vol_type = UBI_VID_DYNAMIC; -- vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOL_ID); -+ vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); - vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; - vid_hdr->data_size = vid_hdr->used_ebs = - vid_hdr->data_pad = cpu_to_be32(0); - vid_hdr->lnum = cpu_to_be32(copy); - vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); -- vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0); - - /* The EC header is already there, write the VID header */ - err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); -@@ -374,7 +420,7 @@ - * to LEB 0. - */ - -- dbg_msg("check layout volume"); -+ dbg_gen("check layout volume"); - - /* Read both LEB 0 and LEB 1 into memory */ - ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { -@@ -388,7 +434,16 @@ - err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, - ubi->vtbl_size); - if (err == UBI_IO_BITFLIPS || err == -EBADMSG) -- /* Scrub the PEB later */ -+ /* -+ * Scrub the PEB later. Note, -EBADMSG indicates an -+ * uncorrectable ECC error, but we have our own CRC and -+ * the data will be checked later. If the data is OK, -+ * the PEB will be scrubbed (because we set -+ * seb->scrub). If the data is not OK, the contents of -+ * the PEB will be recovered from the second copy, and -+ * seb->scrub will be cleared in -+ * 'ubi_scan_add_used()'. -+ */ - seb->scrub = 1; - else if (err) - goto out_free; -@@ -404,7 +459,8 @@ - if (!leb_corrupted[0]) { - /* LEB 0 is OK */ - if (leb[1]) -- leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); -+ leb_corrupted[1] = memcmp(leb[0], leb[1], -+ ubi->vtbl_size); - if (leb_corrupted[1]) { - ubi_warn("volume table copy #2 is corrupted"); - err = create_vtbl(ubi, si, 1, leb[0]); -@@ -518,6 +574,17 @@ - vol->name[vol->name_len] = '\0'; - vol->vol_id = i; - -+ if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { -+ /* Auto re-size flag may be set only for one volume */ -+ if (ubi->autoresize_vol_id != -1) { -+ ubi_err("more then one auto-resize volume (%d " -+ "and %d)", ubi->autoresize_vol_id, i); -+ return -EINVAL; -+ } -+ -+ ubi->autoresize_vol_id = i; -+ } -+ - ubi_assert(!ubi->volumes[i]); - ubi->volumes[i] = vol; - ubi->vol_count += 1; -@@ -568,6 +635,7 @@ - vol->last_eb_bytes = sv->last_data_size; - } - -+ /* And add the layout volume */ - vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); - if (!vol) - return -ENOMEM; -@@ -582,7 +650,8 @@ - vol->last_eb_bytes = vol->reserved_pebs; - vol->used_bytes = - (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad); -- vol->vol_id = UBI_LAYOUT_VOL_ID; -+ vol->vol_id = UBI_LAYOUT_VOLUME_ID; -+ vol->ref_count = 1; - - ubi_assert(!ubi->volumes[i]); - ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol; -@@ -610,30 +679,32 @@ - static int check_sv(const struct ubi_volume *vol, - const struct ubi_scan_volume *sv) - { -+ int err; -+ - if (sv->highest_lnum >= vol->reserved_pebs) { -- dbg_err("bad highest_lnum"); -+ err = 1; - goto bad; - } - if (sv->leb_count > vol->reserved_pebs) { -- dbg_err("bad leb_count"); -+ err = 2; - goto bad; - } - if (sv->vol_type != vol->vol_type) { -- dbg_err("bad vol_type"); -+ err = 3; - goto bad; - } - if (sv->used_ebs > vol->reserved_pebs) { -- dbg_err("bad used_ebs"); -+ err = 4; - goto bad; - } - if (sv->data_pad != vol->data_pad) { -- dbg_err("bad data_pad"); -+ err = 5; - goto bad; - } - return 0; - - bad: -- ubi_err("bad scanning information"); -+ ubi_err("bad scanning information, error %d", err); - ubi_dbg_dump_sv(sv); - ubi_dbg_dump_vol_info(vol); - return -EINVAL; -@@ -662,14 +733,13 @@ - return -EINVAL; - } - -- if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&& -+ if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && - si->highest_vol_id < UBI_INTERNAL_VOL_START) { - ubi_err("too large volume ID %d found by scanning", - si->highest_vol_id); - return -EINVAL; - } - -- - for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { - cond_resched(); - -@@ -707,8 +777,7 @@ - } - - /** -- * ubi_read_volume_table - read volume table. -- * information. -+ * ubi_read_volume_table - read the volume table. - * @ubi: UBI device description object - * @si: scanning information - * -@@ -734,7 +803,7 @@ - ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; - ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); - -- sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID); -+ sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); - if (!sv) { - /* - * No logical eraseblocks belonging to the layout volume were -@@ -787,11 +856,10 @@ - - out_free: - vfree(ubi->vtbl); -- for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) -- if (ubi->volumes[i]) { -- kfree(ubi->volumes[i]); -- ubi->volumes[i] = NULL; -- } -+ for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { -+ kfree(ubi->volumes[i]); -+ ubi->volumes[i] = NULL; -+ } - return err; - } - -diff -Nurd linux-2.6.24.orig/drivers/mtd/ubi/wl.c linux-2.6.24/drivers/mtd/ubi/wl.c ---- linux-2.6.24.orig/drivers/mtd/ubi/wl.c 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/drivers/mtd/ubi/wl.c 2009-04-17 09:49:26.000000000 +0200 -@@ -19,22 +19,22 @@ - */ - - /* -- * UBI wear-leveling unit. -+ * UBI wear-leveling sub-system. - * -- * This unit is responsible for wear-leveling. It works in terms of physical -- * eraseblocks and erase counters and knows nothing about logical eraseblocks, -- * volumes, etc. From this unit's perspective all physical eraseblocks are of -- * two types - used and free. Used physical eraseblocks are those that were -- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are -- * those that were put by the 'ubi_wl_put_peb()' function. -+ * This sub-system is responsible for wear-leveling. It works in terms of -+ * physical eraseblocks and erase counters and knows nothing about logical -+ * eraseblocks, volumes, etc. From this sub-system's perspective all physical -+ * eraseblocks are of two types - used and free. Used physical eraseblocks are -+ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical -+ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. - * - * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter -- * header. The rest of the physical eraseblock contains only 0xFF bytes. -+ * header. The rest of the physical eraseblock contains only %0xFF bytes. - * -- * When physical eraseblocks are returned to the WL unit by means of the -+ * When physical eraseblocks are returned to the WL sub-system by means of the - * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is - * done asynchronously in context of the per-UBI device background thread, -- * which is also managed by the WL unit. -+ * which is also managed by the WL sub-system. - * - * The wear-leveling is ensured by means of moving the contents of used - * physical eraseblocks with low erase counter to free physical eraseblocks -@@ -43,34 +43,64 @@ - * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick - * an "optimal" physical eraseblock. For example, when it is known that the - * physical eraseblock will be "put" soon because it contains short-term data, -- * the WL unit may pick a free physical eraseblock with low erase counter, and -- * so forth. -+ * the WL sub-system may pick a free physical eraseblock with low erase -+ * counter, and so forth. - * -- * If the WL unit fails to erase a physical eraseblock, it marks it as bad. -+ * If the WL sub-system fails to erase a physical eraseblock, it marks it as -+ * bad. - * -- * This unit is also responsible for scrubbing. If a bit-flip is detected in a -- * physical eraseblock, it has to be moved. Technically this is the same as -- * moving it for wear-leveling reasons. -+ * This sub-system is also responsible for scrubbing. If a bit-flip is detected -+ * in a physical eraseblock, it has to be moved. Technically this is the same -+ * as moving it for wear-leveling reasons. - * -- * As it was said, for the UBI unit all physical eraseblocks are either "free" -- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used -- * eraseblocks are kept in a set of different RB-trees: @wl->used, -- * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. -+ * As it was said, for the UBI sub-system all physical eraseblocks are either -+ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while -+ * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or -+ * (temporarily) in the @wl->pq queue. -+ * -+ * When the WL sub-system returns a physical eraseblock, the physical -+ * eraseblock is protected from being moved for some "time". For this reason, -+ * the physical eraseblock is not directly moved from the @wl->free tree to the -+ * @wl->used tree. There is a protection queue in between where this -+ * physical eraseblock is temporarily stored (@wl->pq). -+ * -+ * All this protection stuff is needed because: -+ * o we don't want to move physical eraseblocks just after we have given them -+ * to the user; instead, we first want to let users fill them up with data; -+ * -+ * o there is a chance that the user will put the physical eraseblock very -+ * soon, so it makes sense not to move it for some time, but wait; this is -+ * especially important in case of "short term" physical eraseblocks. -+ * -+ * Physical eraseblocks stay protected only for limited time. But the "time" is -+ * measured in erase cycles in this case. This is implemented with help of the -+ * protection queue. Eraseblocks are put to the tail of this queue when they -+ * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the -+ * head of the queue on each erase operation (for any eraseblock). So the -+ * length of the queue defines how may (global) erase cycles PEBs are protected. -+ * -+ * To put it differently, each physical eraseblock has 2 main states: free and -+ * used. The former state corresponds to the @wl->free tree. The latter state -+ * is split up on several sub-states: -+ * o the WL movement is allowed (@wl->used tree); -+ * o the WL movement is temporarily prohibited (@wl->pq queue); -+ * o scrubbing is needed (@wl->scrub tree). -+ * -+ * Depending on the sub-state, wear-leveling entries of the used physical -+ * eraseblocks may be kept in one of those structures. - * - * Note, in this implementation, we keep a small in-RAM object for each physical - * eraseblock. This is surely not a scalable solution. But it appears to be good - * enough for moderately large flashes and it is simple. In future, one may -- * re-work this unit and make it more scalable. -+ * re-work this sub-system and make it more scalable. - * -- * At the moment this unit does not utilize the sequence number, which was -- * introduced relatively recently. But it would be wise to do this because the -- * sequence number of a logical eraseblock characterizes how old is it. For -+ * At the moment this sub-system does not utilize the sequence number, which -+ * was introduced relatively recently. But it would be wise to do this because -+ * the sequence number of a logical eraseblock characterizes how old is it. For - * example, when we move a PEB with low erase counter, and we need to pick the - * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we - * pick target PEB with an average EC if our PEB is not very "old". This is a -- * room for future re-works of the WL unit. -- * -- * FIXME: looks too complex, should be simplified (later). -+ * room for future re-works of the WL sub-system. - */ - - #include <linux/slab.h> -@@ -83,29 +113,22 @@ - #define WL_RESERVED_PEBS 1 - - /* -- * How many erase cycles are short term, unknown, and long term physical -- * eraseblocks protected. -- */ --#define ST_PROTECTION 16 --#define U_PROTECTION 10 --#define LT_PROTECTION 4 -- --/* - * Maximum difference between two erase counters. If this threshold is -- * exceeded, the WL unit starts moving data from used physical eraseblocks with -- * low erase counter to free physical eraseblocks with high erase counter. -+ * exceeded, the WL sub-system starts moving data from used physical -+ * eraseblocks with low erase counter to free physical eraseblocks with high -+ * erase counter. - */ - #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD - - /* -- * When a physical eraseblock is moved, the WL unit has to pick the target -+ * When a physical eraseblock is moved, the WL sub-system has to pick the target - * physical eraseblock to move to. The simplest way would be just to pick the - * one with the highest erase counter. But in certain workloads this could lead - * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a - * situation when the picked physical eraseblock is constantly erased after the - * data is written to it. So, we have a constant which limits the highest erase -- * counter of the free physical eraseblock to pick. Namely, the WL unit does -- * not pick eraseblocks with erase counter greater then the lowest erase -+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system -+ * does not pick eraseblocks with erase counter greater then the lowest erase - * counter plus %WL_FREE_MAX_DIFF. - */ - #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) -@@ -117,80 +140,9 @@ - #define WL_MAX_FAILURES 32 - - /** -- * struct ubi_wl_entry - wear-leveling entry. -- * @rb: link in the corresponding RB-tree -- * @ec: erase counter -- * @pnum: physical eraseblock number -- * -- * Each physical eraseblock has a corresponding &struct wl_entry object which -- * may be kept in different RB-trees. -- */ --struct ubi_wl_entry { -- struct rb_node rb; -- int ec; -- int pnum; --}; -- --/** -- * struct ubi_wl_prot_entry - PEB protection entry. -- * @rb_pnum: link in the @wl->prot.pnum RB-tree -- * @rb_aec: link in the @wl->prot.aec RB-tree -- * @abs_ec: the absolute erase counter value when the protection ends -- * @e: the wear-leveling entry of the physical eraseblock under protection -- * -- * When the WL unit returns a physical eraseblock, the physical eraseblock is -- * protected from being moved for some "time". For this reason, the physical -- * eraseblock is not directly moved from the @wl->free tree to the @wl->used -- * tree. There is one more tree in between where this physical eraseblock is -- * temporarily stored (@wl->prot). -- * -- * All this protection stuff is needed because: -- * o we don't want to move physical eraseblocks just after we have given them -- * to the user; instead, we first want to let users fill them up with data; -- * -- * o there is a chance that the user will put the physical eraseblock very -- * soon, so it makes sense not to move it for some time, but wait; this is -- * especially important in case of "short term" physical eraseblocks. -- * -- * Physical eraseblocks stay protected only for limited time. But the "time" is -- * measured in erase cycles in this case. This is implemented with help of the -- * absolute erase counter (@wl->abs_ec). When it reaches certain value, the -- * physical eraseblocks are moved from the protection trees (@wl->prot.*) to -- * the @wl->used tree. -- * -- * Protected physical eraseblocks are searched by physical eraseblock number -- * (when they are put) and by the absolute erase counter (to check if it is -- * time to move them to the @wl->used tree). So there are actually 2 RB-trees -- * storing the protected physical eraseblocks: @wl->prot.pnum and -- * @wl->prot.aec. They are referred to as the "protection" trees. The -- * first one is indexed by the physical eraseblock number. The second one is -- * indexed by the absolute erase counter. Both trees store -- * &struct ubi_wl_prot_entry objects. -- * -- * Each physical eraseblock has 2 main states: free and used. The former state -- * corresponds to the @wl->free tree. The latter state is split up on several -- * sub-states: -- * o the WL movement is allowed (@wl->used tree); -- * o the WL movement is temporarily prohibited (@wl->prot.pnum and -- * @wl->prot.aec trees); -- * o scrubbing is needed (@wl->scrub tree). -- * -- * Depending on the sub-state, wear-leveling entries of the used physical -- * eraseblocks may be kept in one of those trees. -- */ --struct ubi_wl_prot_entry { -- struct rb_node rb_pnum; -- struct rb_node rb_aec; -- unsigned long long abs_ec; -- struct ubi_wl_entry *e; --}; -- --/** - * struct ubi_work - UBI work description data structure. - * @list: a link in the list of pending works - * @func: worker function -- * @priv: private data of the worker function -- * - * @e: physical eraseblock to erase - * @torture: if the physical eraseblock has to be tortured - * -@@ -211,14 +163,13 @@ - static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); - static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, - struct rb_root *root); -+static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); - #else - #define paranoid_check_ec(ubi, pnum, ec) 0 - #define paranoid_check_in_wl_tree(e, root) -+#define paranoid_check_in_pq(ubi, e) 0 - #endif - --/* Slab cache for wear-leveling entries */ --static struct kmem_cache *wl_entries_slab; -- - /** - * wl_tree_add - add a wear-leveling entry to a WL RB-tree. - * @e: the wear-leveling entry to add -@@ -236,7 +187,7 @@ - struct ubi_wl_entry *e1; - - parent = *p; -- e1 = rb_entry(parent, struct ubi_wl_entry, rb); -+ e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); - - if (e->ec < e1->ec) - p = &(*p)->rb_left; -@@ -251,8 +202,8 @@ - } - } - -- rb_link_node(&e->rb, parent, p); -- rb_insert_color(&e->rb, root); -+ rb_link_node(&e->u.rb, parent, p); -+ rb_insert_color(&e->u.rb, root); - } - - /** -@@ -267,15 +218,26 @@ - int err; - struct ubi_work *wrk; - -- spin_lock(&ubi->wl_lock); -+ cond_resched(); - -+ /* -+ * @ubi->work_sem is used to synchronize with the workers. Workers take -+ * it in read mode, so many of them may be doing works at a time. But -+ * the queue flush code has to be sure the whole queue of works is -+ * done, and it takes the mutex in write mode. -+ */ -+ down_read(&ubi->work_sem); -+ spin_lock(&ubi->wl_lock); - if (list_empty(&ubi->works)) { - spin_unlock(&ubi->wl_lock); -+ up_read(&ubi->work_sem); - return 0; - } - - wrk = list_entry(ubi->works.next, struct ubi_work, list); - list_del(&wrk->list); -+ ubi->works_count -= 1; -+ ubi_assert(ubi->works_count >= 0); - spin_unlock(&ubi->wl_lock); - - /* -@@ -286,11 +248,8 @@ - err = wrk->func(ubi, wrk, 0); - if (err) - ubi_err("work failed with error code %d", err); -+ up_read(&ubi->work_sem); - -- spin_lock(&ubi->wl_lock); -- ubi->works_count -= 1; -- ubi_assert(ubi->works_count >= 0); -- spin_unlock(&ubi->wl_lock); - return err; - } - -@@ -339,7 +298,7 @@ - while (p) { - struct ubi_wl_entry *e1; - -- e1 = rb_entry(p, struct ubi_wl_entry, rb); -+ e1 = rb_entry(p, struct ubi_wl_entry, u.rb); - - if (e->pnum == e1->pnum) { - ubi_assert(e == e1); -@@ -363,50 +322,24 @@ - } - - /** -- * prot_tree_add - add physical eraseblock to protection trees. -+ * prot_queue_add - add physical eraseblock to the protection queue. - * @ubi: UBI device description object - * @e: the physical eraseblock to add -- * @pe: protection entry object to use -- * @abs_ec: absolute erase counter value when this physical eraseblock has -- * to be removed from the protection trees. - * -- * @wl->lock has to be locked. -+ * This function adds @e to the tail of the protection queue @ubi->pq, where -+ * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be -+ * temporarily protected from the wear-leveling worker. Note, @wl->lock has to -+ * be locked. - */ --static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, -- struct ubi_wl_prot_entry *pe, int abs_ec) -+static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) - { -- struct rb_node **p, *parent = NULL; -- struct ubi_wl_prot_entry *pe1; -- -- pe->e = e; -- pe->abs_ec = ubi->abs_ec + abs_ec; -- -- p = &ubi->prot.pnum.rb_node; -- while (*p) { -- parent = *p; -- pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); -- -- if (e->pnum < pe1->e->pnum) -- p = &(*p)->rb_left; -- else -- p = &(*p)->rb_right; -- } -- rb_link_node(&pe->rb_pnum, parent, p); -- rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); -- -- p = &ubi->prot.aec.rb_node; -- parent = NULL; -- while (*p) { -- parent = *p; -- pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); -+ int pq_tail = ubi->pq_head - 1; - -- if (pe->abs_ec < pe1->abs_ec) -- p = &(*p)->rb_left; -- else -- p = &(*p)->rb_right; -- } -- rb_link_node(&pe->rb_aec, parent, p); -- rb_insert_color(&pe->rb_aec, &ubi->prot.aec); -+ if (pq_tail < 0) -+ pq_tail = UBI_PROT_QUEUE_LEN - 1; -+ ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); -+ list_add_tail(&e->u.list, &ubi->pq[pq_tail]); -+ dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); - } - - /** -@@ -422,14 +355,14 @@ - struct rb_node *p; - struct ubi_wl_entry *e; - -- e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); -+ e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); - max += e->ec; - - p = root->rb_node; - while (p) { - struct ubi_wl_entry *e1; - -- e1 = rb_entry(p, struct ubi_wl_entry, rb); -+ e1 = rb_entry(p, struct ubi_wl_entry, u.rb); - if (e1->ec >= max) - p = p->rb_left; - else { -@@ -451,17 +384,12 @@ - */ - int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) - { -- int err, protect, medium_ec; -+ int err, medium_ec; - struct ubi_wl_entry *e, *first, *last; -- struct ubi_wl_prot_entry *pe; - - ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || - dtype == UBI_UNKNOWN); - -- pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); -- if (!pe) -- return -ENOMEM; -- - retry: - spin_lock(&ubi->wl_lock); - if (!ubi->free.rb_node) { -@@ -469,110 +397,91 @@ - ubi_assert(list_empty(&ubi->works)); - ubi_err("no free eraseblocks"); - spin_unlock(&ubi->wl_lock); -- kfree(pe); - return -ENOSPC; - } - spin_unlock(&ubi->wl_lock); - - err = produce_free_peb(ubi); -- if (err < 0) { -- kfree(pe); -+ if (err < 0) - return err; -- } - goto retry; - } - - switch (dtype) { -- case UBI_LONGTERM: -- /* -- * For long term data we pick a physical eraseblock -- * with high erase counter. But the highest erase -- * counter we can pick is bounded by the the lowest -- * erase counter plus %WL_FREE_MAX_DIFF. -- */ -- e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); -- protect = LT_PROTECTION; -- break; -- case UBI_UNKNOWN: -- /* -- * For unknown data we pick a physical eraseblock with -- * medium erase counter. But we by no means can pick a -- * physical eraseblock with erase counter greater or -- * equivalent than the lowest erase counter plus -- * %WL_FREE_MAX_DIFF. -- */ -- first = rb_entry(rb_first(&ubi->free), -- struct ubi_wl_entry, rb); -- last = rb_entry(rb_last(&ubi->free), -- struct ubi_wl_entry, rb); -+ case UBI_LONGTERM: -+ /* -+ * For long term data we pick a physical eraseblock with high -+ * erase counter. But the highest erase counter we can pick is -+ * bounded by the the lowest erase counter plus -+ * %WL_FREE_MAX_DIFF. -+ */ -+ e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); -+ break; -+ case UBI_UNKNOWN: -+ /* -+ * For unknown data we pick a physical eraseblock with medium -+ * erase counter. But we by no means can pick a physical -+ * eraseblock with erase counter greater or equivalent than the -+ * lowest erase counter plus %WL_FREE_MAX_DIFF. -+ */ -+ first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, -+ u.rb); -+ last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb); - -- if (last->ec - first->ec < WL_FREE_MAX_DIFF) -- e = rb_entry(ubi->free.rb_node, -- struct ubi_wl_entry, rb); -- else { -- medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; -- e = find_wl_entry(&ubi->free, medium_ec); -- } -- protect = U_PROTECTION; -- break; -- case UBI_SHORTTERM: -- /* -- * For short term data we pick a physical eraseblock -- * with the lowest erase counter as we expect it will -- * be erased soon. -- */ -- e = rb_entry(rb_first(&ubi->free), -- struct ubi_wl_entry, rb); -- protect = ST_PROTECTION; -- break; -- default: -- protect = 0; -- e = NULL; -- BUG(); -+ if (last->ec - first->ec < WL_FREE_MAX_DIFF) -+ e = rb_entry(ubi->free.rb_node, -+ struct ubi_wl_entry, u.rb); -+ else { -+ medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; -+ e = find_wl_entry(&ubi->free, medium_ec); -+ } -+ break; -+ case UBI_SHORTTERM: -+ /* -+ * For short term data we pick a physical eraseblock with the -+ * lowest erase counter as we expect it will be erased soon. -+ */ -+ e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb); -+ break; -+ default: -+ BUG(); - } - -+ paranoid_check_in_wl_tree(e, &ubi->free); -+ - /* -- * Move the physical eraseblock to the protection trees where it will -+ * Move the physical eraseblock to the protection queue where it will - * be protected from being moved for some time. - */ -- paranoid_check_in_wl_tree(e, &ubi->free); -- rb_erase(&e->rb, &ubi->free); -- prot_tree_add(ubi, e, pe, protect); -- -- dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); -+ rb_erase(&e->u.rb, &ubi->free); -+ dbg_wl("PEB %d EC %d", e->pnum, e->ec); -+ prot_queue_add(ubi, e); - spin_unlock(&ubi->wl_lock); -- - return e->pnum; - } - - /** -- * prot_tree_del - remove a physical eraseblock from the protection trees -+ * prot_queue_del - remove a physical eraseblock from the protection queue. - * @ubi: UBI device description object - * @pnum: the physical eraseblock to remove -+ * -+ * This function deletes PEB @pnum from the protection queue and returns zero -+ * in case of success and %-ENODEV if the PEB was not found. - */ --static void prot_tree_del(struct ubi_device *ubi, int pnum) -+static int prot_queue_del(struct ubi_device *ubi, int pnum) - { -- struct rb_node *p; -- struct ubi_wl_prot_entry *pe = NULL; -- -- p = ubi->prot.pnum.rb_node; -- while (p) { -- -- pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); -+ struct ubi_wl_entry *e; - -- if (pnum == pe->e->pnum) -- break; -+ e = ubi->lookuptbl[pnum]; -+ if (!e) -+ return -ENODEV; - -- if (pnum < pe->e->pnum) -- p = p->rb_left; -- else -- p = p->rb_right; -- } -+ if (paranoid_check_in_pq(ubi, e)) -+ return -ENODEV; - -- ubi_assert(pe->e->pnum == pnum); -- rb_erase(&pe->rb_aec, &ubi->prot.aec); -- rb_erase(&pe->rb_pnum, &ubi->prot.pnum); -- kfree(pe); -+ list_del(&e->u.list); -+ dbg_wl("deleted PEB %d from the protection queue", e->pnum); -+ return 0; - } - - /** -@@ -584,7 +493,8 @@ - * This function returns zero in case of success and a negative error code in - * case of failure. - */ --static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) -+static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, -+ int torture) - { - int err; - struct ubi_ec_hdr *ec_hdr; -@@ -636,48 +546,47 @@ - } - - /** -- * check_protection_over - check if it is time to stop protecting some -- * physical eraseblocks. -+ * serve_prot_queue - check if it is time to stop protecting PEBs. - * @ubi: UBI device description object - * -- * This function is called after each erase operation, when the absolute erase -- * counter is incremented, to check if some physical eraseblock have not to be -- * protected any longer. These physical eraseblocks are moved from the -- * protection trees to the used tree. -+ * This function is called after each erase operation and removes PEBs from the -+ * tail of the protection queue. These PEBs have been protected for long enough -+ * and should be moved to the used tree. - */ --static void check_protection_over(struct ubi_device *ubi) -+static void serve_prot_queue(struct ubi_device *ubi) - { -- struct ubi_wl_prot_entry *pe; -+ struct ubi_wl_entry *e, *tmp; -+ int count; - - /* - * There may be several protected physical eraseblock to remove, - * process them all. - */ -- while (1) { -- spin_lock(&ubi->wl_lock); -- if (!ubi->prot.aec.rb_node) { -- spin_unlock(&ubi->wl_lock); -- break; -- } -- -- pe = rb_entry(rb_first(&ubi->prot.aec), -- struct ubi_wl_prot_entry, rb_aec); -+repeat: -+ count = 0; -+ spin_lock(&ubi->wl_lock); -+ list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { -+ dbg_wl("PEB %d EC %d protection over, move to used tree", -+ e->pnum, e->ec); - -- if (pe->abs_ec > ubi->abs_ec) { -+ list_del(&e->u.list); -+ wl_tree_add(e, &ubi->used); -+ if (count++ > 32) { -+ /* -+ * Let's be nice and avoid holding the spinlock for -+ * too long. -+ */ - spin_unlock(&ubi->wl_lock); -- break; -+ cond_resched(); -+ goto repeat; - } -- -- dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", -- pe->e->pnum, ubi->abs_ec, pe->abs_ec); -- rb_erase(&pe->rb_aec, &ubi->prot.aec); -- rb_erase(&pe->rb_pnum, &ubi->prot.pnum); -- wl_tree_add(pe->e, &ubi->used); -- spin_unlock(&ubi->wl_lock); -- -- kfree(pe); -- cond_resched(); - } -+ -+ ubi->pq_head += 1; -+ if (ubi->pq_head == UBI_PROT_QUEUE_LEN) -+ ubi->pq_head = 0; -+ ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); -+ spin_unlock(&ubi->wl_lock); - } - - /** -@@ -685,8 +594,8 @@ - * @ubi: UBI device description object - * @wrk: the work to schedule - * -- * This function enqueues a work defined by @wrk to the tail of the pending -- * works list. -+ * This function adds a work defined by @wrk to the tail of the pending works -+ * list. - */ - static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) - { -@@ -744,12 +653,11 @@ - static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, - int cancel) - { -- int err, put = 0; -+ int err, scrubbing = 0, torture = 0; - struct ubi_wl_entry *e1, *e2; - struct ubi_vid_hdr *vid_hdr; - - kfree(wrk); -- - if (cancel) - return 0; - -@@ -757,21 +665,17 @@ - if (!vid_hdr) - return -ENOMEM; - -+ mutex_lock(&ubi->move_mutex); - spin_lock(&ubi->wl_lock); -+ ubi_assert(!ubi->move_from && !ubi->move_to); -+ ubi_assert(!ubi->move_to_put); - -- /* -- * Only one WL worker at a time is supported at this implementation, so -- * make sure a PEB is not being moved already. -- */ -- if (ubi->move_to || !ubi->free.rb_node || -+ if (!ubi->free.rb_node || - (!ubi->used.rb_node && !ubi->scrub.rb_node)) { - /* -- * Only one WL worker at a time is supported at this -- * implementation, so if a LEB is already being moved, cancel. -- * -- * No free physical eraseblocks? Well, we cancel wear-leveling -- * then. It will be triggered again when a free physical -- * eraseblock appears. -+ * No free physical eraseblocks? Well, they must be waiting in -+ * the queue to be erased. Cancel movement - it will be -+ * triggered again when a free physical eraseblock appears. - * - * No used physical eraseblocks? They must be temporarily - * protected from being moved. They will be moved to the -@@ -780,10 +684,7 @@ - */ - dbg_wl("cancel WL, a list is empty: free %d, used %d", - !ubi->free.rb_node, !ubi->used.rb_node); -- ubi->wl_scheduled = 0; -- spin_unlock(&ubi->wl_lock); -- ubi_free_vid_hdr(ubi, vid_hdr); -- return 0; -+ goto out_cancel; - } - - if (!ubi->scrub.rb_node) { -@@ -792,33 +693,30 @@ - * highly worn-out free physical eraseblock. If the erase - * counters differ much enough, start wear-leveling. - */ -- e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); -+ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); - e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - - if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { - dbg_wl("no WL needed: min used EC %d, max free EC %d", - e1->ec, e2->ec); -- ubi->wl_scheduled = 0; -- spin_unlock(&ubi->wl_lock); -- ubi_free_vid_hdr(ubi, vid_hdr); -- return 0; -+ goto out_cancel; - } - paranoid_check_in_wl_tree(e1, &ubi->used); -- rb_erase(&e1->rb, &ubi->used); -+ rb_erase(&e1->u.rb, &ubi->used); - dbg_wl("move PEB %d EC %d to PEB %d EC %d", - e1->pnum, e1->ec, e2->pnum, e2->ec); - } else { -- e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); -+ /* Perform scrubbing */ -+ scrubbing = 1; -+ e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); - e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - paranoid_check_in_wl_tree(e1, &ubi->scrub); -- rb_erase(&e1->rb, &ubi->scrub); -+ rb_erase(&e1->u.rb, &ubi->scrub); - dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); - } - - paranoid_check_in_wl_tree(e2, &ubi->free); -- rb_erase(&e2->rb, &ubi->free); -- ubi_assert(!ubi->move_from && !ubi->move_to); -- ubi_assert(!ubi->move_to_put && !ubi->move_from_put); -+ rb_erase(&e2->u.rb, &ubi->free); - ubi->move_from = e1; - ubi->move_to = e2; - spin_unlock(&ubi->wl_lock); -@@ -828,6 +726,10 @@ - * We so far do not know which logical eraseblock our physical - * eraseblock (@e1) belongs to. We have to read the volume identifier - * header first. -+ * -+ * Note, we are protected from this PEB being unmapped and erased. The -+ * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB -+ * which is being moved was unmapped. - */ - - err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); -@@ -842,97 +744,145 @@ - * likely have the VID header in place. - */ - dbg_wl("PEB %d has no VID header", e1->pnum); -- err = 0; -- } else { -- ubi_err("error %d while reading VID header from PEB %d", -- err, e1->pnum); -- if (err > 0) -- err = -EIO; -+ goto out_not_moved; - } -- goto error; -+ -+ ubi_err("error %d while reading VID header from PEB %d", -+ err, e1->pnum); -+ if (err > 0) -+ err = -EIO; -+ goto out_error; - } - - err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); - if (err) { -- if (err == UBI_IO_BITFLIPS) -- err = 0; -- goto error; -+ if (err == -EAGAIN) -+ goto out_not_moved; -+ if (err < 0) -+ goto out_error; -+ if (err == 2) { -+ /* Target PEB write error, torture it */ -+ torture = 1; -+ goto out_not_moved; -+ } -+ -+ /* -+ * The LEB has not been moved because the volume is being -+ * deleted or the PEB has been put meanwhile. We should prevent -+ * this PEB from being selected for wear-leveling movement -+ * again, so put it to the protection queue. -+ */ -+ -+ dbg_wl("canceled moving PEB %d", e1->pnum); -+ ubi_assert(err == 1); -+ -+ ubi_free_vid_hdr(ubi, vid_hdr); -+ vid_hdr = NULL; -+ -+ spin_lock(&ubi->wl_lock); -+ prot_queue_add(ubi, e1); -+ ubi_assert(!ubi->move_to_put); -+ ubi->move_from = ubi->move_to = NULL; -+ ubi->wl_scheduled = 0; -+ spin_unlock(&ubi->wl_lock); -+ -+ e1 = NULL; -+ err = schedule_erase(ubi, e2, 0); -+ if (err) -+ goto out_error; -+ mutex_unlock(&ubi->move_mutex); -+ return 0; - } - -+ /* The PEB has been successfully moved */ - ubi_free_vid_hdr(ubi, vid_hdr); -+ vid_hdr = NULL; -+ if (scrubbing) -+ ubi_msg("scrubbed PEB %d, data moved to PEB %d", -+ e1->pnum, e2->pnum); -+ - spin_lock(&ubi->wl_lock); -- if (!ubi->move_to_put) -+ if (!ubi->move_to_put) { - wl_tree_add(e2, &ubi->used); -- else -- put = 1; -+ e2 = NULL; -+ } - ubi->move_from = ubi->move_to = NULL; -- ubi->move_from_put = ubi->move_to_put = 0; -- ubi->wl_scheduled = 0; -+ ubi->move_to_put = ubi->wl_scheduled = 0; - spin_unlock(&ubi->wl_lock); - -- if (put) { -+ err = schedule_erase(ubi, e1, 0); -+ if (err) { -+ e1 = NULL; -+ goto out_error; -+ } -+ -+ if (e2) { - /* - * Well, the target PEB was put meanwhile, schedule it for - * erasure. - */ - dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); - err = schedule_erase(ubi, e2, 0); -- if (err) { -- kmem_cache_free(wl_entries_slab, e2); -- ubi_ro_mode(ubi); -- } -- } -- -- err = schedule_erase(ubi, e1, 0); -- if (err) { -- kmem_cache_free(wl_entries_slab, e1); -- ubi_ro_mode(ubi); -+ if (err) -+ goto out_error; - } - - dbg_wl("done"); -- return err; -+ mutex_unlock(&ubi->move_mutex); -+ return 0; - - /* -- * Some error occurred. @e1 was not changed, so return it back. @e2 -- * might be changed, schedule it for erasure. -+ * For some reasons the LEB was not moved, might be an error, might be -+ * something else. @e1 was not changed, so return it back. @e2 might -+ * have been changed, schedule it for erasure. - */ --error: -- if (err) -- dbg_wl("error %d occurred, cancel operation", err); -- ubi_assert(err <= 0); -- -+out_not_moved: -+ dbg_wl("canceled moving PEB %d", e1->pnum); - ubi_free_vid_hdr(ubi, vid_hdr); -+ vid_hdr = NULL; - spin_lock(&ubi->wl_lock); -- ubi->wl_scheduled = 0; -- if (ubi->move_from_put) -- put = 1; -+ if (scrubbing) -+ wl_tree_add(e1, &ubi->scrub); - else - wl_tree_add(e1, &ubi->used); -+ ubi_assert(!ubi->move_to_put); - ubi->move_from = ubi->move_to = NULL; -- ubi->move_from_put = ubi->move_to_put = 0; -+ ubi->wl_scheduled = 0; - spin_unlock(&ubi->wl_lock); - -- if (put) { -- /* -- * Well, the target PEB was put meanwhile, schedule it for -- * erasure. -- */ -- dbg_wl("PEB %d was put meanwhile, erase", e1->pnum); -- err = schedule_erase(ubi, e1, 0); -- if (err) { -- kmem_cache_free(wl_entries_slab, e1); -- ubi_ro_mode(ubi); -- } -- } -+ e1 = NULL; -+ err = schedule_erase(ubi, e2, torture); -+ if (err) -+ goto out_error; - -- err = schedule_erase(ubi, e2, 0); -- if (err) { -- kmem_cache_free(wl_entries_slab, e2); -- ubi_ro_mode(ubi); -- } -+ mutex_unlock(&ubi->move_mutex); -+ return 0; - -- yield(); -+out_error: -+ ubi_err("error %d while moving PEB %d to PEB %d", -+ err, e1->pnum, e2->pnum); -+ -+ ubi_free_vid_hdr(ubi, vid_hdr); -+ spin_lock(&ubi->wl_lock); -+ ubi->move_from = ubi->move_to = NULL; -+ ubi->move_to_put = ubi->wl_scheduled = 0; -+ spin_unlock(&ubi->wl_lock); -+ -+ if (e1) -+ kmem_cache_free(ubi_wl_entry_slab, e1); -+ if (e2) -+ kmem_cache_free(ubi_wl_entry_slab, e2); -+ ubi_ro_mode(ubi); -+ -+ mutex_unlock(&ubi->move_mutex); - return err; -+ -+out_cancel: -+ ubi->wl_scheduled = 0; -+ spin_unlock(&ubi->wl_lock); -+ mutex_unlock(&ubi->move_mutex); -+ ubi_free_vid_hdr(ubi, vid_hdr); -+ return 0; - } - - /** -@@ -970,7 +920,7 @@ - * erase counter of free physical eraseblocks is greater then - * %UBI_WL_THRESHOLD. - */ -- e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); -+ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); - e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - - if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) -@@ -1020,7 +970,7 @@ - if (cancel) { - dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); - kfree(wl_wrk); -- kmem_cache_free(wl_entries_slab, e); -+ kmem_cache_free(ubi_wl_entry_slab, e); - return 0; - } - -@@ -1032,15 +982,14 @@ - kfree(wl_wrk); - - spin_lock(&ubi->wl_lock); -- ubi->abs_ec += 1; - wl_tree_add(e, &ubi->free); - spin_unlock(&ubi->wl_lock); - - /* -- * One more erase operation has happened, take care about protected -- * physical eraseblocks. -+ * One more erase operation has happened, take care about -+ * protected physical eraseblocks. - */ -- check_protection_over(ubi); -+ serve_prot_queue(ubi); - - /* And take care about wear-leveling */ - err = ensure_wear_leveling(ubi); -@@ -1049,7 +998,7 @@ - - ubi_err("failed to erase PEB %d, error %d", pnum, err); - kfree(wl_wrk); -- kmem_cache_free(wl_entries_slab, e); -+ kmem_cache_free(ubi_wl_entry_slab, e); - - if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || - err == -EBUSY) { -@@ -1119,8 +1068,7 @@ - } - /** -- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling -- * unit. -+ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. - * @ubi: UBI device description object - * @pnum: physical eraseblock to return - * @torture: if this physical eraseblock has to be tortured -@@ -1128,7 +1076,7 @@ - * This function is called to return physical eraseblock @pnum to the pool of - * free physical eraseblocks. The @torture flag has to be set if an I/O error - * occurred to this @pnum and it has to be tested. This function returns zero -- * in case of success and a negative error code in case of failure. -+ * in case of success, and a negative error code in case of failure. - */ - int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) - { -@@ -1139,8 +1087,8 @@ - ubi_assert(pnum >= 0); - ubi_assert(pnum < ubi->peb_count); - -+retry: - spin_lock(&ubi->wl_lock); -- - e = ubi->lookuptbl[pnum]; - if (e == ubi->move_from) { - /* -@@ -1148,17 +1096,22 @@ - * be moved. It will be scheduled for erasure in the - * wear-leveling worker. - */ -- dbg_wl("PEB %d is being moved", pnum); -- ubi_assert(!ubi->move_from_put); -- ubi->move_from_put = 1; -+ dbg_wl("PEB %d is being moved, wait", pnum); - spin_unlock(&ubi->wl_lock); -- return 0; -+ -+ /* Wait for the WL worker by taking the @ubi->move_mutex */ -+ mutex_lock(&ubi->move_mutex); -+ mutex_unlock(&ubi->move_mutex); -+ goto retry; - } else if (e == ubi->move_to) { - /* - * User is putting the physical eraseblock which was selected - * as the target the data is moved to. It may happen if the EBA -- * unit already re-mapped the LEB but the WL unit did has not -- * put the PEB to the "used" tree. -+ * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' -+ * but the WL sub-system has not put the PEB to the "used" tree -+ * yet, but it is about to do this. So we just set a flag which -+ * will tell the WL worker that the PEB is not needed anymore -+ * and should be scheduled for erasure. - */ - dbg_wl("PEB %d is the target of data moving", pnum); - ubi_assert(!ubi->move_to_put); -@@ -1168,12 +1121,19 @@ - } else { - if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(e, &ubi->used); -- rb_erase(&e->rb, &ubi->used); -+ rb_erase(&e->u.rb, &ubi->used); - } else if (in_wl_tree(e, &ubi->scrub)) { - paranoid_check_in_wl_tree(e, &ubi->scrub); -- rb_erase(&e->rb, &ubi->scrub); -- } else -- prot_tree_del(ubi, e->pnum); -+ rb_erase(&e->u.rb, &ubi->scrub); -+ } else { -+ err = prot_queue_del(ubi, e->pnum); -+ if (err) { -+ ubi_err("PEB %d not found", pnum); -+ ubi_ro_mode(ubi); -+ spin_unlock(&ubi->wl_lock); -+ return err; -+ } -+ } - } - spin_unlock(&ubi->wl_lock); - -@@ -1201,7 +1161,7 @@ - { - struct ubi_wl_entry *e; - -- ubi_msg("schedule PEB %d for scrubbing", pnum); -+ dbg_msg("schedule PEB %d for scrubbing", pnum); - - retry: - spin_lock(&ubi->wl_lock); -@@ -1226,9 +1186,18 @@ - - if (in_wl_tree(e, &ubi->used)) { - paranoid_check_in_wl_tree(e, &ubi->used); -- rb_erase(&e->rb, &ubi->used); -- } else -- prot_tree_del(ubi, pnum); -+ rb_erase(&e->u.rb, &ubi->used); -+ } else { -+ int err; -+ -+ err = prot_queue_del(ubi, e->pnum); -+ if (err) { -+ ubi_err("PEB %d not found", pnum); -+ ubi_ro_mode(ubi); -+ spin_unlock(&ubi->wl_lock); -+ return err; -+ } -+ } - - wl_tree_add(e, &ubi->scrub); - spin_unlock(&ubi->wl_lock); -@@ -1249,17 +1218,32 @@ - */ - int ubi_wl_flush(struct ubi_device *ubi) - { -- int err, pending_count; -+ int err; - -- pending_count = ubi->works_count; -+ /* -+ * Erase while the pending works queue is not empty, but not more than -+ * the number of currently pending works. -+ */ -+ dbg_wl("flush (%d pending works)", ubi->works_count); -+ while (ubi->works_count) { -+ err = do_work(ubi); -+ if (err) -+ return err; -+ } - -- dbg_wl("flush (%d pending works)", pending_count); -+ /* -+ * Make sure all the works which have been done in parallel are -+ * finished. -+ */ -+ down_write(&ubi->work_sem); -+ up_write(&ubi->work_sem); - - /* -- * Erase while the pending works queue is not empty, but not more then -- * the number of currently pending works. -+ * And in case last was the WL worker and it canceled the LEB -+ * movement, flush again. - */ -- while (pending_count-- > 0) { -+ while (ubi->works_count) { -+ dbg_wl("flush more (%d pending works)", ubi->works_count); - err = do_work(ubi); - if (err) - return err; -@@ -1284,17 +1268,17 @@ - else if (rb->rb_right) - rb = rb->rb_right; - else { -- e = rb_entry(rb, struct ubi_wl_entry, rb); -+ e = rb_entry(rb, struct ubi_wl_entry, u.rb); - - rb = rb_parent(rb); - if (rb) { -- if (rb->rb_left == &e->rb) -+ if (rb->rb_left == &e->u.rb) - rb->rb_left = NULL; - else - rb->rb_right = NULL; - } - -- kmem_cache_free(wl_entries_slab, e); -+ kmem_cache_free(ubi_wl_entry_slab, e); - } - } - } -@@ -1303,7 +1287,7 @@ - * ubi_thread - UBI background thread. - * @u: the UBI device description object pointer - */ --static int ubi_thread(void *u) -+int ubi_thread(void *u) - { - int failures = 0; - struct ubi_device *ubi = u; -@@ -1316,7 +1300,7 @@ - int err; - - if (kthread_should_stop()) -- goto out; -+ break; - - if (try_to_freeze()) - continue; -@@ -1343,7 +1327,8 @@ - ubi_msg("%s: %d consecutive failures", - ubi->bgt_name, WL_MAX_FAILURES); - ubi_ro_mode(ubi); -- break; -+ ubi->thread_enabled = 0; -+ continue; - } - } else - failures = 0; -@@ -1351,7 +1336,6 @@ - cond_resched(); - } - --out: - dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); - return 0; - } -@@ -1374,8 +1358,7 @@ - } - - /** -- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning -- * information. -+ * ubi_wl_init_scan - initialize the WL sub-system using scanning information. - * @ubi: UBI device description object - * @si: scanning information - * -@@ -1384,46 +1367,34 @@ - */ - int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) - { -- int err; -+ int err, i; - struct rb_node *rb1, *rb2; - struct ubi_scan_volume *sv; - struct ubi_scan_leb *seb, *tmp; - struct ubi_wl_entry *e; - -- - ubi->used = ubi->free = ubi->scrub = RB_ROOT; -- ubi->prot.pnum = ubi->prot.aec = RB_ROOT; - spin_lock_init(&ubi->wl_lock); -+ mutex_init(&ubi->move_mutex); -+ init_rwsem(&ubi->work_sem); - ubi->max_ec = si->max_ec; - INIT_LIST_HEAD(&ubi->works); - - sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); - -- ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); -- if (IS_ERR(ubi->bgt_thread)) { -- err = PTR_ERR(ubi->bgt_thread); -- ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, -- err); -- return err; -- } -- -- if (ubi_devices_cnt == 0) { -- wl_entries_slab = kmem_cache_create("ubi_wl_entry_slab", -- sizeof(struct ubi_wl_entry), -- 0, 0, NULL); -- if (!wl_entries_slab) -- return -ENOMEM; -- } -- - err = -ENOMEM; - ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); - if (!ubi->lookuptbl) -- goto out_free; -+ return err; -+ -+ for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) -+ INIT_LIST_HEAD(&ubi->pq[i]); -+ ubi->pq_head = 0; - - list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { - cond_resched(); - -- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); -+ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; - -@@ -1431,7 +1402,7 @@ - e->ec = seb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, 0)) { -- kmem_cache_free(wl_entries_slab, e); -+ kmem_cache_free(ubi_wl_entry_slab, e); - goto out_free; - } - } -@@ -1439,7 +1410,7 @@ - list_for_each_entry(seb, &si->free, u.list) { - cond_resched(); - -- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); -+ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; - -@@ -1453,7 +1424,7 @@ - list_for_each_entry(seb, &si->corr, u.list) { - cond_resched(); - -- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); -+ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; - -@@ -1461,7 +1432,7 @@ - e->ec = seb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, 0)) { -- kmem_cache_free(wl_entries_slab, e); -+ kmem_cache_free(ubi_wl_entry_slab, e); - goto out_free; - } - } -@@ -1470,7 +1441,7 @@ - ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { - cond_resched(); - -- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); -+ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; - -@@ -1510,70 +1481,45 @@ - tree_destroy(&ubi->free); - tree_destroy(&ubi->scrub); - kfree(ubi->lookuptbl); -- if (ubi_devices_cnt == 0) -- kmem_cache_destroy(wl_entries_slab); - return err; - } - - /** -- * protection_trees_destroy - destroy the protection RB-trees. -+ * protection_queue_destroy - destroy the protection queue. - * @ubi: UBI device description object - */ --static void protection_trees_destroy(struct ubi_device *ubi) -+static void protection_queue_destroy(struct ubi_device *ubi) - { -- struct rb_node *rb; -- struct ubi_wl_prot_entry *pe; -- -- rb = ubi->prot.aec.rb_node; -- while (rb) { -- if (rb->rb_left) -- rb = rb->rb_left; -- else if (rb->rb_right) -- rb = rb->rb_right; -- else { -- pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); -- -- rb = rb_parent(rb); -- if (rb) { -- if (rb->rb_left == &pe->rb_aec) -- rb->rb_left = NULL; -- else -- rb->rb_right = NULL; -- } -+ int i; -+ struct ubi_wl_entry *e, *tmp; - -- kmem_cache_free(wl_entries_slab, pe->e); -- kfree(pe); -+ for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { -+ list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { -+ list_del(&e->u.list); -+ kmem_cache_free(ubi_wl_entry_slab, e); - } - } - } - - /** -- * ubi_wl_close - close the wear-leveling unit. -+ * ubi_wl_close - close the wear-leveling sub-system. - * @ubi: UBI device description object - */ - void ubi_wl_close(struct ubi_device *ubi) - { -- dbg_wl("disable \"%s\"", ubi->bgt_name); -- if (ubi->bgt_thread) -- kthread_stop(ubi->bgt_thread); -- -- dbg_wl("close the UBI wear-leveling unit"); -- -+ dbg_wl("close the WL sub-system"); - cancel_pending(ubi); -- protection_trees_destroy(ubi); -+ protection_queue_destroy(ubi); - tree_destroy(&ubi->used); - tree_destroy(&ubi->free); - tree_destroy(&ubi->scrub); - kfree(ubi->lookuptbl); -- if (ubi_devices_cnt == 1) -- kmem_cache_destroy(wl_entries_slab); - } - - #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID - - /** -- * paranoid_check_ec - make sure that the erase counter of a physical eraseblock -- * is correct. -+ * paranoid_check_ec - make sure that the erase counter of a PEB is correct. - * @ubi: UBI device description object - * @pnum: the physical eraseblock number to check - * @ec: the erase counter to check -@@ -1614,13 +1560,12 @@ - } - - /** -- * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present -- * in a WL RB-tree. -+ * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. - * @e: the wear-leveling entry to check - * @root: the root of the tree - * -- * This function returns zero if @e is in the @root RB-tree and %1 if it -- * is not. -+ * This function returns zero if @e is in the @root RB-tree and %1 if it is -+ * not. - */ - static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, - struct rb_root *root) -@@ -1634,4 +1579,27 @@ - return 1; - } - -+/** -+ * paranoid_check_in_pq - check if wear-leveling entry is in the protection -+ * queue. -+ * @ubi: UBI device description object -+ * @e: the wear-leveling entry to check -+ * -+ * This function returns zero if @e is in @ubi->pq and %1 if it is not. -+ */ -+static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) -+{ -+ struct ubi_wl_entry *p; -+ int i; -+ -+ for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) -+ list_for_each_entry(p, &ubi->pq[i], u.list) -+ if (p == e) -+ return 0; -+ -+ ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue", -+ e->pnum, e->ec); -+ ubi_dbg_dump_stack(); -+ return 1; -+} - #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ -diff -Nurd linux-2.6.24.orig/fs/Kconfig linux-2.6.24/fs/Kconfig ---- linux-2.6.24.orig/fs/Kconfig 2009-04-17 09:45:12.000000000 +0200 -+++ linux-2.6.24/fs/Kconfig 2009-04-17 09:49:26.000000000 +0200 -@@ -1385,6 +1385,9 @@ - - endchoice - -+# UBIFS File system configuration -+source "fs/ubifs/Kconfig" -+ - config CRAMFS - tristate "Compressed ROM file system support (cramfs)" - depends on BLOCK -diff -Nurd linux-2.6.24.orig/fs/Makefile linux-2.6.24/fs/Makefile ---- linux-2.6.24.orig/fs/Makefile 2009-04-17 09:45:12.000000000 +0200 -+++ linux-2.6.24/fs/Makefile 2009-04-17 09:49:28.000000000 +0200 -@@ -100,6 +100,7 @@ - obj-$(CONFIG_UFS_FS) += ufs/ - obj-$(CONFIG_EFS_FS) += efs/ - obj-$(CONFIG_JFFS2_FS) += jffs2/ -+obj-$(CONFIG_UBIFS_FS) += ubifs/ - obj-$(CONFIG_AFFS_FS) += affs/ - obj-$(CONFIG_ROMFS_FS) += romfs/ - obj-$(CONFIG_QNX4FS_FS) += qnx4/ -diff -Nurd linux-2.6.24.orig/fs/fs-writeback.c linux-2.6.24/fs/fs-writeback.c ---- linux-2.6.24.orig/fs/fs-writeback.c 2009-04-17 09:45:12.000000000 +0200 -+++ linux-2.6.24/fs/fs-writeback.c 2009-04-17 09:49:28.000000000 +0200 -@@ -386,8 +386,6 @@ - * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so - * that it can be located for waiting on in __writeback_single_inode(). - * -- * Called under inode_lock. -- * - * If `bdi' is non-zero then we're being asked to writeback a specific queue. - * This function assumes that the blockdev superblock's inodes are backed by - * a variety of queues, so all inodes are searched. For other superblocks, -@@ -403,11 +401,12 @@ - * on the writer throttling path, and we get decent balancing between many - * throttled threads: we don't want them all piling up on inode_sync_wait. - */ --static void --sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) -+void generic_sync_sb_inodes(struct super_block *sb, -+ struct writeback_control *wbc) - { - const unsigned long start = jiffies; /* livelock avoidance */ - -+ spin_lock(&inode_lock); - if (!wbc->for_kupdate || list_empty(&sb->s_io)) - queue_io(sb, wbc->older_than_this); - -@@ -482,8 +481,16 @@ - if (wbc->nr_to_write <= 0) - break; - } -+ spin_unlock(&inode_lock); - return; /* Leave any unwritten inodes on s_io */ - } -+EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); -+ -+static void sync_sb_inodes(struct super_block *sb, -+ struct writeback_control *wbc) -+{ -+ generic_sync_sb_inodes(sb, wbc); -+} - - /* - * Start writeback of dirty pagecache data against all unlocked inodes. -@@ -524,11 +531,8 @@ - * be unmounted by the time it is released. - */ - if (down_read_trylock(&sb->s_umount)) { -- if (sb->s_root) { -- spin_lock(&inode_lock); -+ if (sb->s_root) - sync_sb_inodes(sb, wbc); -- spin_unlock(&inode_lock); -- } - up_read(&sb->s_umount); - } - spin_lock(&sb_lock); -@@ -566,9 +570,7 @@ - (inodes_stat.nr_inodes - inodes_stat.nr_unused) + - nr_dirty + nr_unstable; - wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ -- spin_lock(&inode_lock); - sync_sb_inodes(sb, &wbc); -- spin_unlock(&inode_lock); - } - - /* -diff -Nurd linux-2.6.24.orig/fs/ubifs/Kconfig linux-2.6.24/fs/ubifs/Kconfig ---- linux-2.6.24.orig/fs/ubifs/Kconfig 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/Kconfig 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,72 @@ -+config UBIFS_FS -+ tristate "UBIFS file system support" -+ select CRC16 -+ select CRC32 -+ select CRYPTO if UBIFS_FS_ADVANCED_COMPR -+ select CRYPTO if UBIFS_FS_LZO -+ select CRYPTO if UBIFS_FS_ZLIB -+ select CRYPTO_LZO if UBIFS_FS_LZO -+ select CRYPTO_DEFLATE if UBIFS_FS_ZLIB -+ depends on MTD_UBI -+ help -+ UBIFS is a file system for flash devices which works on top of UBI. -+ -+config UBIFS_FS_XATTR -+ bool "Extended attributes support" -+ depends on UBIFS_FS -+ help -+ This option enables support of extended attributes. -+ -+config UBIFS_FS_ADVANCED_COMPR -+ bool "Advanced compression options" -+ depends on UBIFS_FS -+ help -+ This option allows to explicitly choose which compressions, if any, -+ are enabled in UBIFS. Removing compressors means inbility to read -+ existing file systems. -+ -+ If unsure, say 'N'. -+ -+config UBIFS_FS_LZO -+ bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR -+ depends on UBIFS_FS -+ default y -+ help -+ LZO compressor is generally faster then zlib but compresses worse. -+ Say 'Y' if unsure. -+ -+config UBIFS_FS_ZLIB -+ bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR -+ depends on UBIFS_FS -+ default y -+ help -+ Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. -+ -+# Debugging-related stuff -+config UBIFS_FS_DEBUG -+ bool "Enable debugging" -+ depends on UBIFS_FS -+ select DEBUG_FS -+ select KALLSYMS_ALL -+ help -+ This option enables UBIFS debugging. -+ -+config UBIFS_FS_DEBUG_MSG_LVL -+ int "Default message level (0 = no extra messages, 3 = lots)" -+ depends on UBIFS_FS_DEBUG -+ default "0" -+ help -+ This controls the amount of debugging messages produced by UBIFS. -+ If reporting bugs, please try to have available a full dump of the -+ messages at level 1 while the misbehaviour was occurring. Level 2 -+ may become necessary if level 1 messages were not enough to find the -+ bug. Generally Level 3 should be avoided. -+ -+config UBIFS_FS_DEBUG_CHKS -+ bool "Enable extra checks" -+ depends on UBIFS_FS_DEBUG -+ help -+ If extra checks are enabled UBIFS will check the consistency of its -+ internal data structures during operation. However, UBIFS performance -+ is dramatically slower when this option is selected especially if the -+ file system is large. -diff -Nurd linux-2.6.24.orig/fs/ubifs/Makefile linux-2.6.24/fs/ubifs/Makefile ---- linux-2.6.24.orig/fs/ubifs/Makefile 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/Makefile 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,9 @@ -+obj-$(CONFIG_UBIFS_FS) += ubifs.o -+ -+ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o -+ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o -+ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o -+ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o -+ -+ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o -+ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o -diff -Nurd linux-2.6.24.orig/fs/ubifs/budget.c linux-2.6.24/fs/ubifs/budget.c ---- linux-2.6.24.orig/fs/ubifs/budget.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/budget.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,758 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements the budgeting sub-system which is responsible for UBIFS -+ * space management. -+ * -+ * Factors such as compression, wasted space at the ends of LEBs, space in other -+ * journal heads, the effect of updates on the index, and so on, make it -+ * impossible to accurately predict the amount of space needed. Consequently -+ * approximations are used. -+ */ -+ -+#include "ubifs.h" -+#include <linux/writeback.h> -+ -+/* -+ * When pessimistic budget calculations say that there is no enough space, -+ * UBIFS starts writing back dirty inodes and pages, doing garbage collection, -+ * or committing. The below constant defines maximum number of times UBIFS -+ * repeats the operations. -+ */ -+#define MAX_MKSPC_RETRIES 3 -+ -+/* -+ * The below constant defines amount of dirty pages which should be written -+ * back at when trying to shrink the liability. -+ */ -+#define NR_TO_WRITE 16 -+ -+/** -+ * shrink_liability - write-back some dirty pages/inodes. -+ * @c: UBIFS file-system description object -+ * @nr_to_write: how many dirty pages to write-back -+ * -+ * This function shrinks UBIFS liability by means of writing back some amount -+ * of dirty inodes and their pages. Returns the amount of pages which were -+ * written back. The returned value does not include dirty inodes which were -+ * synchronized. -+ * -+ * Note, this function synchronizes even VFS inodes which are locked -+ * (@i_mutex) by the caller of the budgeting function, because write-back does -+ * not touch @i_mutex. -+ */ -+static int shrink_liability(struct ubifs_info *c, int nr_to_write) -+{ -+ int nr_written; -+ struct writeback_control wbc = { -+ .sync_mode = WB_SYNC_NONE, -+ .range_end = LLONG_MAX, -+ .nr_to_write = nr_to_write, -+ }; -+ -+ generic_sync_sb_inodes(c->vfs_sb, &wbc); -+ nr_written = nr_to_write - wbc.nr_to_write; -+ -+ if (!nr_written) { -+ /* -+ * Re-try again but wait on pages/inodes which are being -+ * written-back concurrently (e.g., by pdflush). -+ */ -+ memset(&wbc, 0, sizeof(struct writeback_control)); -+ wbc.sync_mode = WB_SYNC_ALL; -+ wbc.range_end = LLONG_MAX; -+ wbc.nr_to_write = nr_to_write; -+ generic_sync_sb_inodes(c->vfs_sb, &wbc); -+ nr_written = nr_to_write - wbc.nr_to_write; -+ } -+ -+ dbg_budg("%d pages were written back", nr_written); -+ return nr_written; -+} -+ -+ -+/** -+ * run_gc - run garbage collector. -+ * @c: UBIFS file-system description object -+ * -+ * This function runs garbage collector to make some more free space. Returns -+ * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a -+ * negative error code in case of failure. -+ */ -+static int run_gc(struct ubifs_info *c) -+{ -+ int err, lnum; -+ -+ /* Make some free space by garbage-collecting dirty space */ -+ down_read(&c->commit_sem); -+ lnum = ubifs_garbage_collect(c, 1); -+ up_read(&c->commit_sem); -+ if (lnum < 0) -+ return lnum; -+ -+ /* GC freed one LEB, return it to lprops */ -+ dbg_budg("GC freed LEB %d", lnum); -+ err = ubifs_return_leb(c, lnum); -+ if (err) -+ return err; -+ return 0; -+} -+ -+/** -+ * get_liability - calculate current liability. -+ * @c: UBIFS file-system description object -+ * -+ * This function calculates and returns current UBIFS liability, i.e. the -+ * amount of bytes UBIFS has "promised" to write to the media. -+ */ -+static long long get_liability(struct ubifs_info *c) -+{ -+ long long liab; -+ -+ spin_lock(&c->space_lock); -+ liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; -+ spin_unlock(&c->space_lock); -+ return liab; -+} -+ -+/** -+ * make_free_space - make more free space on the file-system. -+ * @c: UBIFS file-system description object -+ * -+ * This function is called when an operation cannot be budgeted because there -+ * is supposedly no free space. But in most cases there is some free space: -+ * o budgeting is pessimistic, so it always budgets more then it is actually -+ * needed, so shrinking the liability is one way to make free space - the -+ * cached data will take less space then it was budgeted for; -+ * o GC may turn some dark space into free space (budgeting treats dark space -+ * as not available); -+ * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs. -+ * -+ * So this function tries to do the above. Returns %-EAGAIN if some free space -+ * was presumably made and the caller has to re-try budgeting the operation. -+ * Returns %-ENOSPC if it couldn't do more free space, and other negative error -+ * codes on failures. -+ */ -+static int make_free_space(struct ubifs_info *c) -+{ -+ int err, retries = 0; -+ long long liab1, liab2; -+ -+ do { -+ liab1 = get_liability(c); -+ /* -+ * We probably have some dirty pages or inodes (liability), try -+ * to write them back. -+ */ -+ dbg_budg("liability %lld, run write-back", liab1); -+ shrink_liability(c, NR_TO_WRITE); -+ -+ liab2 = get_liability(c); -+ if (liab2 < liab1) -+ return -EAGAIN; -+ -+ dbg_budg("new liability %lld (not shrinked)", liab2); -+ -+ /* Liability did not shrink again, try GC */ -+ dbg_budg("Run GC"); -+ err = run_gc(c); -+ if (!err) -+ return -EAGAIN; -+ -+ if (err != -EAGAIN && err != -ENOSPC) -+ /* Some real error happened */ -+ return err; -+ -+ dbg_budg("Run commit (retries %d)", retries); -+ err = ubifs_run_commit(c); -+ if (err) -+ return err; -+ } while (retries++ < MAX_MKSPC_RETRIES); -+ -+ return -ENOSPC; -+} -+ -+/** -+ * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. -+ * @c: UBIFS file-system description object -+ * -+ * This function calculates and returns the number of eraseblocks which should -+ * be kept for index usage. -+ */ -+int ubifs_calc_min_idx_lebs(struct ubifs_info *c) -+{ -+ int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; -+ long long idx_size; -+ -+ idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; -+ -+ /* And make sure we have thrice the index size of space reserved */ -+ idx_size = idx_size + (idx_size << 1); -+ -+ /* -+ * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' -+ * pair, nor similarly the two variables for the new index size, so we -+ * have to do this costly 64-bit division on fast-path. -+ */ -+ idx_size += eff_leb_size - 1; -+ idx_lebs = div_u64(idx_size, eff_leb_size); -+ /* -+ * The index head is not available for the in-the-gaps method, so add an -+ * extra LEB to compensate. -+ */ -+ idx_lebs += 1; -+ if (idx_lebs < MIN_INDEX_LEBS) -+ idx_lebs = MIN_INDEX_LEBS; -+ return idx_lebs; -+} -+ -+/** -+ * ubifs_calc_available - calculate available FS space. -+ * @c: UBIFS file-system description object -+ * @min_idx_lebs: minimum number of LEBs reserved for the index -+ * -+ * This function calculates and returns amount of FS space available for use. -+ */ -+long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) -+{ -+ int subtract_lebs; -+ long long available; -+ -+ available = c->main_bytes - c->lst.total_used; -+ -+ /* -+ * Now 'available' contains theoretically available flash space -+ * assuming there is no index, so we have to subtract the space which -+ * is reserved for the index. -+ */ -+ subtract_lebs = min_idx_lebs; -+ -+ /* Take into account that GC reserves one LEB for its own needs */ -+ subtract_lebs += 1; -+ -+ /* -+ * The GC journal head LEB is not really accessible. And since -+ * different write types go to different heads, we may count only on -+ * one head's space. -+ */ -+ subtract_lebs += c->jhead_cnt - 1; -+ -+ /* We also reserve one LEB for deletions, which bypass budgeting */ -+ subtract_lebs += 1; -+ -+ available -= (long long)subtract_lebs * c->leb_size; -+ -+ /* Subtract the dead space which is not available for use */ -+ available -= c->lst.total_dead; -+ -+ /* -+ * Subtract dark space, which might or might not be usable - it depends -+ * on the data which we have on the media and which will be written. If -+ * this is a lot of uncompressed or not-compressible data, the dark -+ * space cannot be used. -+ */ -+ available -= c->lst.total_dark; -+ -+ /* -+ * However, there is more dark space. The index may be bigger than -+ * @min_idx_lebs. Those extra LEBs are assumed to be available, but -+ * their dark space is not included in total_dark, so it is subtracted -+ * here. -+ */ -+ if (c->lst.idx_lebs > min_idx_lebs) { -+ subtract_lebs = c->lst.idx_lebs - min_idx_lebs; -+ available -= subtract_lebs * c->dark_wm; -+ } -+ -+ /* The calculations are rough and may end up with a negative number */ -+ return available > 0 ? available : 0; -+} -+ -+/** -+ * can_use_rp - check whether the user is allowed to use reserved pool. -+ * @c: UBIFS file-system description object -+ * -+ * UBIFS has so-called "reserved pool" which is flash space reserved -+ * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock. -+ * This function checks whether current user is allowed to use reserved pool. -+ * Returns %1 current user is allowed to use reserved pool and %0 otherwise. -+ */ -+static int can_use_rp(struct ubifs_info *c) -+{ -+ if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) || -+ (c->rp_gid != 0 && in_group_p(c->rp_gid))) -+ return 1; -+ return 0; -+} -+ -+/** -+ * do_budget_space - reserve flash space for index and data growth. -+ * @c: UBIFS file-system description object -+ * -+ * This function makes sure UBIFS has enough free eraseblocks for index growth -+ * and data. -+ * -+ * When budgeting index space, UBIFS reserves thrice as many LEBs as the index -+ * would take if it was consolidated and written to the flash. This guarantees -+ * that the "in-the-gaps" commit method always succeeds and UBIFS will always -+ * be able to commit dirty index. So this function basically adds amount of -+ * budgeted index space to the size of the current index, multiplies this by 3, -+ * and makes sure this does not exceed the amount of free eraseblocks. -+ * -+ * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: -+ * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might -+ * be large, because UBIFS does not do any index consolidation as long as -+ * there is free space. IOW, the index may take a lot of LEBs, but the LEBs -+ * will contain a lot of dirt. -+ * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be -+ * consolidated to take up to @c->min_idx_lebs LEBs. -+ * -+ * This function returns zero in case of success, and %-ENOSPC in case of -+ * failure. -+ */ -+static int do_budget_space(struct ubifs_info *c) -+{ -+ long long outstanding, available; -+ int lebs, rsvd_idx_lebs, min_idx_lebs; -+ -+ /* First budget index space */ -+ min_idx_lebs = ubifs_calc_min_idx_lebs(c); -+ -+ /* Now 'min_idx_lebs' contains number of LEBs to reserve */ -+ if (min_idx_lebs > c->lst.idx_lebs) -+ rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; -+ else -+ rsvd_idx_lebs = 0; -+ -+ /* -+ * The number of LEBs that are available to be used by the index is: -+ * -+ * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - -+ * @c->lst.taken_empty_lebs -+ * -+ * @c->lst.empty_lebs are available because they are empty. -+ * @c->freeable_cnt are available because they contain only free and -+ * dirty space, @c->idx_gc_cnt are available because they are index -+ * LEBs that have been garbage collected and are awaiting the commit -+ * before they can be used. And the in-the-gaps method will grab these -+ * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have -+ * already been allocated for some purpose. -+ * -+ * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because -+ * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they -+ * are taken until after the commit). -+ * -+ * Note, @c->lst.taken_empty_lebs may temporarily be higher by one -+ * because of the way we serialize LEB allocations and budgeting. See a -+ * comment in 'ubifs_find_free_space()'. -+ */ -+ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - -+ c->lst.taken_empty_lebs; -+ if (unlikely(rsvd_idx_lebs > lebs)) { -+ dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " -+ "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, -+ rsvd_idx_lebs); -+ return -ENOSPC; -+ } -+ -+ available = ubifs_calc_available(c, min_idx_lebs); -+ outstanding = c->budg_data_growth + c->budg_dd_growth; -+ -+ if (unlikely(available < outstanding)) { -+ dbg_budg("out of data space: available %lld, outstanding %lld", -+ available, outstanding); -+ return -ENOSPC; -+ } -+ -+ if (available - outstanding <= c->rp_size && !can_use_rp(c)) -+ return -ENOSPC; -+ -+ c->min_idx_lebs = min_idx_lebs; -+ return 0; -+} -+ -+/** -+ * calc_idx_growth - calculate approximate index growth from budgeting request. -+ * @c: UBIFS file-system description object -+ * @req: budgeting request -+ * -+ * For now we assume each new node adds one znode. But this is rather poor -+ * approximation, though. -+ */ -+static int calc_idx_growth(const struct ubifs_info *c, -+ const struct ubifs_budget_req *req) -+{ -+ int znodes; -+ -+ znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) + -+ req->new_dent; -+ return znodes * c->max_idx_node_sz; -+} -+ -+/** -+ * calc_data_growth - calculate approximate amount of new data from budgeting -+ * request. -+ * @c: UBIFS file-system description object -+ * @req: budgeting request -+ */ -+static int calc_data_growth(const struct ubifs_info *c, -+ const struct ubifs_budget_req *req) -+{ -+ int data_growth; -+ -+ data_growth = req->new_ino ? c->inode_budget : 0; -+ if (req->new_page) -+ data_growth += c->page_budget; -+ if (req->new_dent) -+ data_growth += c->dent_budget; -+ data_growth += req->new_ino_d; -+ return data_growth; -+} -+ -+/** -+ * calc_dd_growth - calculate approximate amount of data which makes other data -+ * dirty from budgeting request. -+ * @c: UBIFS file-system description object -+ * @req: budgeting request -+ */ -+static int calc_dd_growth(const struct ubifs_info *c, -+ const struct ubifs_budget_req *req) -+{ -+ int dd_growth; -+ -+ dd_growth = req->dirtied_page ? c->page_budget : 0; -+ -+ if (req->dirtied_ino) -+ dd_growth += c->inode_budget << (req->dirtied_ino - 1); -+ if (req->mod_dent) -+ dd_growth += c->dent_budget; -+ dd_growth += req->dirtied_ino_d; -+ return dd_growth; -+} -+ -+/** -+ * ubifs_budget_space - ensure there is enough space to complete an operation. -+ * @c: UBIFS file-system description object -+ * @req: budget request -+ * -+ * This function allocates budget for an operation. It uses pessimistic -+ * approximation of how much flash space the operation needs. The goal of this -+ * function is to make sure UBIFS always has flash space to flush all dirty -+ * pages, dirty inodes, and dirty znodes (liability). This function may force -+ * commit, garbage-collection or write-back. Returns zero in case of success, -+ * %-ENOSPC if there is no free space and other negative error codes in case of -+ * failures. -+ */ -+int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) -+{ -+ int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); -+ int err, idx_growth, data_growth, dd_growth, retried = 0; -+ -+ ubifs_assert(req->new_page <= 1); -+ ubifs_assert(req->dirtied_page <= 1); -+ ubifs_assert(req->new_dent <= 1); -+ ubifs_assert(req->mod_dent <= 1); -+ ubifs_assert(req->new_ino <= 1); -+ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); -+ ubifs_assert(req->dirtied_ino <= 4); -+ ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); -+ ubifs_assert(!(req->new_ino_d & 7)); -+ ubifs_assert(!(req->dirtied_ino_d & 7)); -+ -+ data_growth = calc_data_growth(c, req); -+ dd_growth = calc_dd_growth(c, req); -+ if (!data_growth && !dd_growth) -+ return 0; -+ idx_growth = calc_idx_growth(c, req); -+ -+again: -+ spin_lock(&c->space_lock); -+ ubifs_assert(c->budg_idx_growth >= 0); -+ ubifs_assert(c->budg_data_growth >= 0); -+ ubifs_assert(c->budg_dd_growth >= 0); -+ -+ if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { -+ dbg_budg("no space"); -+ spin_unlock(&c->space_lock); -+ return -ENOSPC; -+ } -+ -+ c->budg_idx_growth += idx_growth; -+ c->budg_data_growth += data_growth; -+ c->budg_dd_growth += dd_growth; -+ -+ err = do_budget_space(c); -+ if (likely(!err)) { -+ req->idx_growth = idx_growth; -+ req->data_growth = data_growth; -+ req->dd_growth = dd_growth; -+ spin_unlock(&c->space_lock); -+ return 0; -+ } -+ -+ /* Restore the old values */ -+ c->budg_idx_growth -= idx_growth; -+ c->budg_data_growth -= data_growth; -+ c->budg_dd_growth -= dd_growth; -+ spin_unlock(&c->space_lock); -+ -+ if (req->fast) { -+ dbg_budg("no space for fast budgeting"); -+ return err; -+ } -+ -+ err = make_free_space(c); -+ cond_resched(); -+ if (err == -EAGAIN) { -+ dbg_budg("try again"); -+ goto again; -+ } else if (err == -ENOSPC) { -+ if (!retried) { -+ retried = 1; -+ dbg_budg("-ENOSPC, but anyway try once again"); -+ goto again; -+ } -+ dbg_budg("FS is full, -ENOSPC"); -+ c->nospace = 1; -+ if (can_use_rp(c) || c->rp_size == 0) -+ c->nospace_rp = 1; -+ smp_wmb(); -+ } else -+ ubifs_err("cannot budget space, error %d", err); -+ return err; -+} -+ -+/** -+ * ubifs_release_budget - release budgeted free space. -+ * @c: UBIFS file-system description object -+ * @req: budget request -+ * -+ * This function releases the space budgeted by 'ubifs_budget_space()'. Note, -+ * since the index changes (which were budgeted for in @req->idx_growth) will -+ * only be written to the media on commit, this function moves the index budget -+ * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be -+ * zeroed by the commit operation. -+ */ -+void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) -+{ -+ ubifs_assert(req->new_page <= 1); -+ ubifs_assert(req->dirtied_page <= 1); -+ ubifs_assert(req->new_dent <= 1); -+ ubifs_assert(req->mod_dent <= 1); -+ ubifs_assert(req->new_ino <= 1); -+ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); -+ ubifs_assert(req->dirtied_ino <= 4); -+ ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); -+ ubifs_assert(!(req->new_ino_d & 7)); -+ ubifs_assert(!(req->dirtied_ino_d & 7)); -+ if (!req->recalculate) { -+ ubifs_assert(req->idx_growth >= 0); -+ ubifs_assert(req->data_growth >= 0); -+ ubifs_assert(req->dd_growth >= 0); -+ } -+ -+ if (req->recalculate) { -+ req->data_growth = calc_data_growth(c, req); -+ req->dd_growth = calc_dd_growth(c, req); -+ req->idx_growth = calc_idx_growth(c, req); -+ } -+ -+ if (!req->data_growth && !req->dd_growth) -+ return; -+ -+ c->nospace = c->nospace_rp = 0; -+ smp_wmb(); -+ -+ spin_lock(&c->space_lock); -+ c->budg_idx_growth -= req->idx_growth; -+ c->budg_uncommitted_idx += req->idx_growth; -+ c->budg_data_growth -= req->data_growth; -+ c->budg_dd_growth -= req->dd_growth; -+ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); -+ -+ ubifs_assert(c->budg_idx_growth >= 0); -+ ubifs_assert(c->budg_data_growth >= 0); -+ ubifs_assert(c->budg_dd_growth >= 0); -+ ubifs_assert(c->min_idx_lebs < c->main_lebs); -+ ubifs_assert(!(c->budg_idx_growth & 7)); -+ ubifs_assert(!(c->budg_data_growth & 7)); -+ ubifs_assert(!(c->budg_dd_growth & 7)); -+ spin_unlock(&c->space_lock); -+} -+ -+/** -+ * ubifs_convert_page_budget - convert budget of a new page. -+ * @c: UBIFS file-system description object -+ * -+ * This function converts budget which was allocated for a new page of data to -+ * the budget of changing an existing page of data. The latter is smaller then -+ * the former, so this function only does simple re-calculation and does not -+ * involve any write-back. -+ */ -+void ubifs_convert_page_budget(struct ubifs_info *c) -+{ -+ spin_lock(&c->space_lock); -+ /* Release the index growth reservation */ -+ c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; -+ /* Release the data growth reservation */ -+ c->budg_data_growth -= c->page_budget; -+ /* Increase the dirty data growth reservation instead */ -+ c->budg_dd_growth += c->page_budget; -+ /* And re-calculate the indexing space reservation */ -+ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); -+ spin_unlock(&c->space_lock); -+} -+ -+/** -+ * ubifs_release_dirty_inode_budget - release dirty inode budget. -+ * @c: UBIFS file-system description object -+ * @ui: UBIFS inode to release the budget for -+ * -+ * This function releases budget corresponding to a dirty inode. It is usually -+ * called when after the inode has been written to the media and marked as -+ * clean. -+ */ -+void ubifs_release_dirty_inode_budget(struct ubifs_info *c, -+ struct ubifs_inode *ui) -+{ -+ struct ubifs_budget_req req; -+ -+ memset(&req, 0, sizeof(struct ubifs_budget_req)); -+ req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); -+ ubifs_release_budget(c, &req); -+} -+ -+/** -+ * ubifs_reported_space - calculate reported free space. -+ * @c: the UBIFS file-system description object -+ * @free: amount of free space -+ * -+ * This function calculates amount of free space which will be reported to -+ * user-space. User-space application tend to expect that if the file-system -+ * (e.g., via the 'statfs()' call) reports that it has N bytes available, they -+ * are able to write a file of size N. UBIFS attaches node headers to each data -+ * node and it has to write indexing nodes as well. This introduces additional -+ * overhead, and UBIFS has to report slightly less free space to meet the above -+ * expectations. -+ * -+ * This function assumes free space is made up of uncompressed data nodes and -+ * full index nodes (one per data node, tripled because we always allow enough -+ * space to write the index thrice). -+ * -+ * Note, the calculation is pessimistic, which means that most of the time -+ * UBIFS reports less space than it actually has. -+ */ -+long long ubifs_reported_space(const struct ubifs_info *c, long long free) -+{ -+ int divisor, factor, f; -+ -+ /* -+ * Reported space size is @free * X, where X is UBIFS block size -+ * divided by UBIFS block size + all overhead one data block -+ * introduces. The overhead is the node header + indexing overhead. -+ * -+ * Indexing overhead calculations are based on the following formula: -+ * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number -+ * of data nodes, f - fanout. Because effective UBIFS fanout is twice -+ * as less than maximum fanout, we assume that each data node -+ * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. -+ * Note, the multiplier 3 is because UBIFS reserves thrice as more space -+ * for the index. -+ */ -+ f = c->fanout > 3 ? c->fanout >> 1 : 2; -+ factor = UBIFS_BLOCK_SIZE; -+ divisor = UBIFS_MAX_DATA_NODE_SZ; -+ divisor += (c->max_idx_node_sz * 3) / (f - 1); -+ free *= factor; -+ return div_u64(free, divisor); -+} -+ -+/** -+ * ubifs_get_free_space_nolock - return amount of free space. -+ * @c: UBIFS file-system description object -+ * -+ * This function calculates amount of free space to report to user-space. -+ * -+ * Because UBIFS may introduce substantial overhead (the index, node headers, -+ * alignment, wastage at the end of eraseblocks, etc), it cannot report real -+ * amount of free flash space it has (well, because not all dirty space is -+ * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, -+ * it would bread user expectations about what free space is. Users seem to -+ * accustomed to assume that if the file-system reports N bytes of free space, -+ * they would be able to fit a file of N bytes to the FS. This almost works for -+ * traditional file-systems, because they have way less overhead than UBIFS. -+ * So, to keep users happy, UBIFS tries to take the overhead into account. -+ */ -+long long ubifs_get_free_space_nolock(struct ubifs_info *c) -+{ -+ int rsvd_idx_lebs, lebs; -+ long long available, outstanding, free; -+ -+ ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); -+ outstanding = c->budg_data_growth + c->budg_dd_growth; -+ available = ubifs_calc_available(c, c->min_idx_lebs); -+ -+ /* -+ * When reporting free space to user-space, UBIFS guarantees that it is -+ * possible to write a file of free space size. This means that for -+ * empty LEBs we may use more precise calculations than -+ * 'ubifs_calc_available()' is using. Namely, we know that in empty -+ * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. -+ * Thus, amend the available space. -+ * -+ * Note, the calculations below are similar to what we have in -+ * 'do_budget_space()', so refer there for comments. -+ */ -+ if (c->min_idx_lebs > c->lst.idx_lebs) -+ rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; -+ else -+ rsvd_idx_lebs = 0; -+ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - -+ c->lst.taken_empty_lebs; -+ lebs -= rsvd_idx_lebs; -+ available += lebs * (c->dark_wm - c->leb_overhead); -+ -+ if (available > outstanding) -+ free = ubifs_reported_space(c, available - outstanding); -+ else -+ free = 0; -+ return free; -+} -+ -+/** -+ * ubifs_get_free_space - return amount of free space. -+ * @c: UBIFS file-system description object -+ * -+ * This function calculates and retuns amount of free space to report to -+ * user-space. -+ */ -+long long ubifs_get_free_space(struct ubifs_info *c) -+{ -+ long long free; -+ -+ spin_lock(&c->space_lock); -+ free = ubifs_get_free_space_nolock(c); -+ spin_unlock(&c->space_lock); -+ -+ return free; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/commit.c linux-2.6.24/fs/ubifs/commit.c ---- linux-2.6.24.orig/fs/ubifs/commit.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/commit.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,679 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements functions that manage the running of the commit process. -+ * Each affected module has its own functions to accomplish their part in the -+ * commit and those functions are called here. -+ * -+ * The commit is the process whereby all updates to the index and LEB properties -+ * are written out together and the journal becomes empty. This keeps the -+ * file system consistent - at all times the state can be recreated by reading -+ * the index and LEB properties and then replaying the journal. -+ * -+ * The commit is split into two parts named "commit start" and "commit end". -+ * During commit start, the commit process has exclusive access to the journal -+ * by holding the commit semaphore down for writing. As few I/O operations as -+ * possible are performed during commit start, instead the nodes that are to be -+ * written are merely identified. During commit end, the commit semaphore is no -+ * longer held and the journal is again in operation, allowing users to continue -+ * to use the file system while the bulk of the commit I/O is performed. The -+ * purpose of this two-step approach is to prevent the commit from causing any -+ * latency blips. Note that in any case, the commit does not prevent lookups -+ * (as permitted by the TNC mutex), or access to VFS data structures e.g. page -+ * cache. -+ */ -+ -+#include <linux/freezer.h> -+#include <linux/kthread.h> -+#include "ubifs.h" -+ -+/** -+ * do_commit - commit the journal. -+ * @c: UBIFS file-system description object -+ * -+ * This function implements UBIFS commit. It has to be called with commit lock -+ * locked. Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+static int do_commit(struct ubifs_info *c) -+{ -+ int err, new_ltail_lnum, old_ltail_lnum, i; -+ struct ubifs_zbranch zroot; -+ struct ubifs_lp_stats lst; -+ -+ dbg_cmt("start"); -+ if (c->ro_media) { -+ err = -EROFS; -+ goto out_up; -+ } -+ -+ /* Sync all write buffers (necessary for recovery) */ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); -+ if (err) -+ goto out_up; -+ } -+ -+ c->cmt_no += 1; -+ err = ubifs_gc_start_commit(c); -+ if (err) -+ goto out_up; -+ err = dbg_check_lprops(c); -+ if (err) -+ goto out_up; -+ err = ubifs_log_start_commit(c, &new_ltail_lnum); -+ if (err) -+ goto out_up; -+ err = ubifs_tnc_start_commit(c, &zroot); -+ if (err) -+ goto out_up; -+ err = ubifs_lpt_start_commit(c); -+ if (err) -+ goto out_up; -+ err = ubifs_orphan_start_commit(c); -+ if (err) -+ goto out_up; -+ -+ ubifs_get_lp_stats(c, &lst); -+ -+ up_write(&c->commit_sem); -+ -+ err = ubifs_tnc_end_commit(c); -+ if (err) -+ goto out; -+ err = ubifs_lpt_end_commit(c); -+ if (err) -+ goto out; -+ err = ubifs_orphan_end_commit(c); -+ if (err) -+ goto out; -+ old_ltail_lnum = c->ltail_lnum; -+ err = ubifs_log_end_commit(c, new_ltail_lnum); -+ if (err) -+ goto out; -+ err = dbg_check_old_index(c, &zroot); -+ if (err) -+ goto out; -+ -+ mutex_lock(&c->mst_mutex); -+ c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); -+ c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); -+ c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); -+ c->mst_node->root_offs = cpu_to_le32(zroot.offs); -+ c->mst_node->root_len = cpu_to_le32(zroot.len); -+ c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); -+ c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); -+ c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); -+ c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); -+ c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); -+ c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); -+ c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs); -+ c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum); -+ c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs); -+ c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum); -+ c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs); -+ c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum); -+ c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs); -+ c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs); -+ c->mst_node->total_free = cpu_to_le64(lst.total_free); -+ c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty); -+ c->mst_node->total_used = cpu_to_le64(lst.total_used); -+ c->mst_node->total_dead = cpu_to_le64(lst.total_dead); -+ c->mst_node->total_dark = cpu_to_le64(lst.total_dark); -+ if (c->no_orphs) -+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); -+ else -+ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); -+ err = ubifs_write_master(c); -+ mutex_unlock(&c->mst_mutex); -+ if (err) -+ goto out; -+ -+ err = ubifs_log_post_commit(c, old_ltail_lnum); -+ if (err) -+ goto out; -+ err = ubifs_gc_end_commit(c); -+ if (err) -+ goto out; -+ err = ubifs_lpt_post_commit(c); -+ if (err) -+ goto out; -+ -+ spin_lock(&c->cs_lock); -+ c->cmt_state = COMMIT_RESTING; -+ wake_up(&c->cmt_wq); -+ dbg_cmt("commit end"); -+ spin_unlock(&c->cs_lock); -+ -+ return 0; -+ -+out_up: -+ up_write(&c->commit_sem); -+out: -+ ubifs_err("commit failed, error %d", err); -+ spin_lock(&c->cs_lock); -+ c->cmt_state = COMMIT_BROKEN; -+ wake_up(&c->cmt_wq); -+ spin_unlock(&c->cs_lock); -+ ubifs_ro_mode(c, err); -+ return err; -+} -+ -+/** -+ * run_bg_commit - run background commit if it is needed. -+ * @c: UBIFS file-system description object -+ * -+ * This function runs background commit if it is needed. Returns zero in case -+ * of success and a negative error code in case of failure. -+ */ -+static int run_bg_commit(struct ubifs_info *c) -+{ -+ spin_lock(&c->cs_lock); -+ /* -+ * Run background commit only if background commit was requested or if -+ * commit is required. -+ */ -+ if (c->cmt_state != COMMIT_BACKGROUND && -+ c->cmt_state != COMMIT_REQUIRED) -+ goto out; -+ spin_unlock(&c->cs_lock); -+ -+ down_write(&c->commit_sem); -+ spin_lock(&c->cs_lock); -+ if (c->cmt_state == COMMIT_REQUIRED) -+ c->cmt_state = COMMIT_RUNNING_REQUIRED; -+ else if (c->cmt_state == COMMIT_BACKGROUND) -+ c->cmt_state = COMMIT_RUNNING_BACKGROUND; -+ else -+ goto out_cmt_unlock; -+ spin_unlock(&c->cs_lock); -+ -+ return do_commit(c); -+ -+out_cmt_unlock: -+ up_write(&c->commit_sem); -+out: -+ spin_unlock(&c->cs_lock); -+ return 0; -+} -+ -+/** -+ * ubifs_bg_thread - UBIFS background thread function. -+ * @info: points to the file-system description object -+ * -+ * This function implements various file-system background activities: -+ * o when a write-buffer timer expires it synchronizes the appropriate -+ * write-buffer; -+ * o when the journal is about to be full, it starts in-advance commit. -+ * -+ * Note, other stuff like background garbage collection may be added here in -+ * future. -+ */ -+int ubifs_bg_thread(void *info) -+{ -+ int err; -+ struct ubifs_info *c = info; -+ -+ dbg_msg("background thread \"%s\" started, PID %d", -+ c->bgt_name, current->pid); -+ set_freezable(); -+ -+ while (1) { -+ if (kthread_should_stop()) -+ break; -+ -+ if (try_to_freeze()) -+ continue; -+ -+ set_current_state(TASK_INTERRUPTIBLE); -+ /* Check if there is something to do */ -+ if (!c->need_bgt) { -+ /* -+ * Nothing prevents us from going sleep now and -+ * be never woken up and block the task which -+ * could wait in 'kthread_stop()' forever. -+ */ -+ if (kthread_should_stop()) -+ break; -+ schedule(); -+ continue; -+ } else -+ __set_current_state(TASK_RUNNING); -+ -+ c->need_bgt = 0; -+ err = ubifs_bg_wbufs_sync(c); -+ if (err) -+ ubifs_ro_mode(c, err); -+ -+ run_bg_commit(c); -+ cond_resched(); -+ } -+ -+ dbg_msg("background thread \"%s\" stops", c->bgt_name); -+ return 0; -+} -+ -+/** -+ * ubifs_commit_required - set commit state to "required". -+ * @c: UBIFS file-system description object -+ * -+ * This function is called if a commit is required but cannot be done from the -+ * calling function, so it is just flagged instead. -+ */ -+void ubifs_commit_required(struct ubifs_info *c) -+{ -+ spin_lock(&c->cs_lock); -+ switch (c->cmt_state) { -+ case COMMIT_RESTING: -+ case COMMIT_BACKGROUND: -+ dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), -+ dbg_cstate(COMMIT_REQUIRED)); -+ c->cmt_state = COMMIT_REQUIRED; -+ break; -+ case COMMIT_RUNNING_BACKGROUND: -+ dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), -+ dbg_cstate(COMMIT_RUNNING_REQUIRED)); -+ c->cmt_state = COMMIT_RUNNING_REQUIRED; -+ break; -+ case COMMIT_REQUIRED: -+ case COMMIT_RUNNING_REQUIRED: -+ case COMMIT_BROKEN: -+ break; -+ } -+ spin_unlock(&c->cs_lock); -+} -+ -+/** -+ * ubifs_request_bg_commit - notify the background thread to do a commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function is called if the journal is full enough to make a commit -+ * worthwhile, so background thread is kicked to start it. -+ */ -+void ubifs_request_bg_commit(struct ubifs_info *c) -+{ -+ spin_lock(&c->cs_lock); -+ if (c->cmt_state == COMMIT_RESTING) { -+ dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), -+ dbg_cstate(COMMIT_BACKGROUND)); -+ c->cmt_state = COMMIT_BACKGROUND; -+ spin_unlock(&c->cs_lock); -+ ubifs_wake_up_bgt(c); -+ } else -+ spin_unlock(&c->cs_lock); -+} -+ -+/** -+ * wait_for_commit - wait for commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function sleeps until the commit operation is no longer running. -+ */ -+static int wait_for_commit(struct ubifs_info *c) -+{ -+ dbg_cmt("pid %d goes sleep", current->pid); -+ -+ /* -+ * The following sleeps if the condition is false, and will be woken -+ * when the commit ends. It is possible, although very unlikely, that we -+ * will wake up and see the subsequent commit running, rather than the -+ * one we were waiting for, and go back to sleep. However, we will be -+ * woken again, so there is no danger of sleeping forever. -+ */ -+ wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND && -+ c->cmt_state != COMMIT_RUNNING_REQUIRED); -+ dbg_cmt("commit finished, pid %d woke up", current->pid); -+ return 0; -+} -+ -+/** -+ * ubifs_run_commit - run or wait for commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function runs commit and returns zero in case of success and a negative -+ * error code in case of failure. -+ */ -+int ubifs_run_commit(struct ubifs_info *c) -+{ -+ int err = 0; -+ -+ spin_lock(&c->cs_lock); -+ if (c->cmt_state == COMMIT_BROKEN) { -+ err = -EINVAL; -+ goto out; -+ } -+ -+ if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) -+ /* -+ * We set the commit state to 'running required' to indicate -+ * that we want it to complete as quickly as possible. -+ */ -+ c->cmt_state = COMMIT_RUNNING_REQUIRED; -+ -+ if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { -+ spin_unlock(&c->cs_lock); -+ return wait_for_commit(c); -+ } -+ spin_unlock(&c->cs_lock); -+ -+ /* Ok, the commit is indeed needed */ -+ -+ down_write(&c->commit_sem); -+ spin_lock(&c->cs_lock); -+ /* -+ * Since we unlocked 'c->cs_lock', the state may have changed, so -+ * re-check it. -+ */ -+ if (c->cmt_state == COMMIT_BROKEN) { -+ err = -EINVAL; -+ goto out_cmt_unlock; -+ } -+ -+ if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) -+ c->cmt_state = COMMIT_RUNNING_REQUIRED; -+ -+ if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { -+ up_write(&c->commit_sem); -+ spin_unlock(&c->cs_lock); -+ return wait_for_commit(c); -+ } -+ c->cmt_state = COMMIT_RUNNING_REQUIRED; -+ spin_unlock(&c->cs_lock); -+ -+ err = do_commit(c); -+ return err; -+ -+out_cmt_unlock: -+ up_write(&c->commit_sem); -+out: -+ spin_unlock(&c->cs_lock); -+ return err; -+} -+ -+/** -+ * ubifs_gc_should_commit - determine if it is time for GC to run commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function is called by garbage collection to determine if commit should -+ * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal -+ * is full enough to start commit, this function returns true. It is not -+ * absolutely necessary to commit yet, but it feels like this should be better -+ * then to keep doing GC. This function returns %1 if GC has to initiate commit -+ * and %0 if not. -+ */ -+int ubifs_gc_should_commit(struct ubifs_info *c) -+{ -+ int ret = 0; -+ -+ spin_lock(&c->cs_lock); -+ if (c->cmt_state == COMMIT_BACKGROUND) { -+ dbg_cmt("commit required now"); -+ c->cmt_state = COMMIT_REQUIRED; -+ } else -+ dbg_cmt("commit not requested"); -+ if (c->cmt_state == COMMIT_REQUIRED) -+ ret = 1; -+ spin_unlock(&c->cs_lock); -+ return ret; -+} -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+/** -+ * struct idx_node - hold index nodes during index tree traversal. -+ * @list: list -+ * @iip: index in parent (slot number of this indexing node in the parent -+ * indexing node) -+ * @upper_key: all keys in this indexing node have to be less or equivalent to -+ * this key -+ * @idx: index node (8-byte aligned because all node structures must be 8-byte -+ * aligned) -+ */ -+struct idx_node { -+ struct list_head list; -+ int iip; -+ union ubifs_key upper_key; -+ struct ubifs_idx_node idx __attribute__((aligned(8))); -+}; -+ -+/** -+ * dbg_old_index_check_init - get information for the next old index check. -+ * @c: UBIFS file-system description object -+ * @zroot: root of the index -+ * -+ * This function records information about the index that will be needed for the -+ * next old index check i.e. 'dbg_check_old_index()'. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) -+{ -+ struct ubifs_idx_node *idx; -+ int lnum, offs, len, err = 0; -+ struct ubifs_debug_info *d = c->dbg; -+ -+ d->old_zroot = *zroot; -+ lnum = d->old_zroot.lnum; -+ offs = d->old_zroot.offs; -+ len = d->old_zroot.len; -+ -+ idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); -+ if (!idx) -+ return -ENOMEM; -+ -+ err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); -+ if (err) -+ goto out; -+ -+ d->old_zroot_level = le16_to_cpu(idx->level); -+ d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); -+out: -+ kfree(idx); -+ return err; -+} -+ -+/** -+ * dbg_check_old_index - check the old copy of the index. -+ * @c: UBIFS file-system description object -+ * @zroot: root of the new index -+ * -+ * In order to be able to recover from an unclean unmount, a complete copy of -+ * the index must exist on flash. This is the "old" index. The commit process -+ * must write the "new" index to flash without overwriting or destroying any -+ * part of the old index. This function is run at commit end in order to check -+ * that the old index does indeed exist completely intact. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) -+{ -+ int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; -+ int first = 1, iip; -+ struct ubifs_debug_info *d = c->dbg; -+ union ubifs_key lower_key, upper_key, l_key, u_key; -+ unsigned long long uninitialized_var(last_sqnum); -+ struct ubifs_idx_node *idx; -+ struct list_head list; -+ struct idx_node *i; -+ size_t sz; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) -+ goto out; -+ -+ INIT_LIST_HEAD(&list); -+ -+ sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) - -+ UBIFS_IDX_NODE_SZ; -+ -+ /* Start at the old zroot */ -+ lnum = d->old_zroot.lnum; -+ offs = d->old_zroot.offs; -+ len = d->old_zroot.len; -+ iip = 0; -+ -+ /* -+ * Traverse the index tree preorder depth-first i.e. do a node and then -+ * its subtrees from left to right. -+ */ -+ while (1) { -+ struct ubifs_branch *br; -+ -+ /* Get the next index node */ -+ i = kmalloc(sz, GFP_NOFS); -+ if (!i) { -+ err = -ENOMEM; -+ goto out_free; -+ } -+ i->iip = iip; -+ /* Keep the index nodes on our path in a linked list */ -+ list_add_tail(&i->list, &list); -+ /* Read the index node */ -+ idx = &i->idx; -+ err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); -+ if (err) -+ goto out_free; -+ /* Validate index node */ -+ child_cnt = le16_to_cpu(idx->child_cnt); -+ if (child_cnt < 1 || child_cnt > c->fanout) { -+ err = 1; -+ goto out_dump; -+ } -+ if (first) { -+ first = 0; -+ /* Check root level and sqnum */ -+ if (le16_to_cpu(idx->level) != d->old_zroot_level) { -+ err = 2; -+ goto out_dump; -+ } -+ if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) { -+ err = 3; -+ goto out_dump; -+ } -+ /* Set last values as though root had a parent */ -+ last_level = le16_to_cpu(idx->level) + 1; -+ last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1; -+ key_read(c, ubifs_idx_key(c, idx), &lower_key); -+ highest_ino_key(c, &upper_key, INUM_WATERMARK); -+ } -+ key_copy(c, &upper_key, &i->upper_key); -+ if (le16_to_cpu(idx->level) != last_level - 1) { -+ err = 3; -+ goto out_dump; -+ } -+ /* -+ * The index is always written bottom up hence a child's sqnum -+ * is always less than the parents. -+ */ -+ if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) { -+ err = 4; -+ goto out_dump; -+ } -+ /* Check key range */ -+ key_read(c, ubifs_idx_key(c, idx), &l_key); -+ br = ubifs_idx_branch(c, idx, child_cnt - 1); -+ key_read(c, &br->key, &u_key); -+ if (keys_cmp(c, &lower_key, &l_key) > 0) { -+ err = 5; -+ goto out_dump; -+ } -+ if (keys_cmp(c, &upper_key, &u_key) < 0) { -+ err = 6; -+ goto out_dump; -+ } -+ if (keys_cmp(c, &upper_key, &u_key) == 0) -+ if (!is_hash_key(c, &u_key)) { -+ err = 7; -+ goto out_dump; -+ } -+ /* Go to next index node */ -+ if (le16_to_cpu(idx->level) == 0) { -+ /* At the bottom, so go up until can go right */ -+ while (1) { -+ /* Drop the bottom of the list */ -+ list_del(&i->list); -+ kfree(i); -+ /* No more list means we are done */ -+ if (list_empty(&list)) -+ goto out; -+ /* Look at the new bottom */ -+ i = list_entry(list.prev, struct idx_node, -+ list); -+ idx = &i->idx; -+ /* Can we go right */ -+ if (iip + 1 < le16_to_cpu(idx->child_cnt)) { -+ iip = iip + 1; -+ break; -+ } else -+ /* Nope, so go up again */ -+ iip = i->iip; -+ } -+ } else -+ /* Go down left */ -+ iip = 0; -+ /* -+ * We have the parent in 'idx' and now we set up for reading the -+ * child pointed to by slot 'iip'. -+ */ -+ last_level = le16_to_cpu(idx->level); -+ last_sqnum = le64_to_cpu(idx->ch.sqnum); -+ br = ubifs_idx_branch(c, idx, iip); -+ lnum = le32_to_cpu(br->lnum); -+ offs = le32_to_cpu(br->offs); -+ len = le32_to_cpu(br->len); -+ key_read(c, &br->key, &lower_key); -+ if (iip + 1 < le16_to_cpu(idx->child_cnt)) { -+ br = ubifs_idx_branch(c, idx, iip + 1); -+ key_read(c, &br->key, &upper_key); -+ } else -+ key_copy(c, &i->upper_key, &upper_key); -+ } -+out: -+ err = dbg_old_index_check_init(c, zroot); -+ if (err) -+ goto out_free; -+ -+ return 0; -+ -+out_dump: -+ dbg_err("dumping index node (iip=%d)", i->iip); -+ dbg_dump_node(c, idx); -+ list_del(&i->list); -+ kfree(i); -+ if (!list_empty(&list)) { -+ i = list_entry(list.prev, struct idx_node, list); -+ dbg_err("dumping parent index node"); -+ dbg_dump_node(c, &i->idx); -+ } -+out_free: -+ while (!list_empty(&list)) { -+ i = list_entry(list.next, struct idx_node, list); -+ list_del(&i->list); -+ kfree(i); -+ } -+ ubifs_err("failed, error %d", err); -+ if (err > 0) -+ err = -EINVAL; -+ return err; -+} -+ -+#endif /* CONFIG_UBIFS_FS_DEBUG */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/compress.c linux-2.6.24/fs/ubifs/compress.c ---- linux-2.6.24.orig/fs/ubifs/compress.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/compress.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,251 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * Copyright (C) 2006, 2007 University of Szeged, Hungary -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ * Zoltan Sogor -+ */ -+ -+/* -+ * This file provides a single place to access to compression and -+ * decompression. -+ */ -+ -+#include <linux/crypto.h> -+#include "ubifs.h" -+ -+/* Fake description object for the "none" compressor */ -+static struct ubifs_compressor none_compr = { -+ .compr_type = UBIFS_COMPR_NONE, -+ .name = "none", -+ .capi_name = "", -+}; -+ -+#ifdef CONFIG_UBIFS_FS_LZO -+static DEFINE_MUTEX(lzo_mutex); -+ -+static struct ubifs_compressor lzo_compr = { -+ .compr_type = UBIFS_COMPR_LZO, -+ .comp_mutex = &lzo_mutex, -+ .name = "lzo", -+ .capi_name = "lzo", -+}; -+#else -+static struct ubifs_compressor lzo_compr = { -+ .compr_type = UBIFS_COMPR_LZO, -+ .name = "lzo", -+}; -+#endif -+ -+#ifdef CONFIG_UBIFS_FS_ZLIB -+static DEFINE_MUTEX(deflate_mutex); -+static DEFINE_MUTEX(inflate_mutex); -+ -+static struct ubifs_compressor zlib_compr = { -+ .compr_type = UBIFS_COMPR_ZLIB, -+ .comp_mutex = &deflate_mutex, -+ .decomp_mutex = &inflate_mutex, -+ .name = "zlib", -+ .capi_name = "deflate", -+}; -+#else -+static struct ubifs_compressor zlib_compr = { -+ .compr_type = UBIFS_COMPR_ZLIB, -+ .name = "zlib", -+}; -+#endif -+ -+/* All UBIFS compressors */ -+struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; -+ -+/** -+ * ubifs_compress - compress data. -+ * @in_buf: data to compress -+ * @in_len: length of the data to compress -+ * @out_buf: output buffer where compressed data should be stored -+ * @out_len: output buffer length is returned here -+ * @compr_type: type of compression to use on enter, actually used compression -+ * type on exit -+ * -+ * This function compresses input buffer @in_buf of length @in_len and stores -+ * the result in the output buffer @out_buf and the resulting length in -+ * @out_len. If the input buffer does not compress, it is just copied to the -+ * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if -+ * compression error occurred. -+ * -+ * Note, if the input buffer was not compressed, it is copied to the output -+ * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. -+ */ -+void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, -+ int *compr_type) -+{ -+ int err; -+ struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; -+ -+ if (*compr_type == UBIFS_COMPR_NONE) -+ goto no_compr; -+ -+ /* If the input data is small, do not even try to compress it */ -+ if (in_len < UBIFS_MIN_COMPR_LEN) -+ goto no_compr; -+ -+ if (compr->comp_mutex) -+ mutex_lock(compr->comp_mutex); -+ err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, -+ (unsigned int *)out_len); -+ if (compr->comp_mutex) -+ mutex_unlock(compr->comp_mutex); -+ if (unlikely(err)) { -+ ubifs_warn("cannot compress %d bytes, compressor %s, " -+ "error %d, leave data uncompressed", -+ in_len, compr->name, err); -+ goto no_compr; -+ } -+ -+ /* -+ * If the data compressed only slightly, it is better to leave it -+ * uncompressed to improve read speed. -+ */ -+ if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF) -+ goto no_compr; -+ -+ return; -+ -+no_compr: -+ memcpy(out_buf, in_buf, in_len); -+ *out_len = in_len; -+ *compr_type = UBIFS_COMPR_NONE; -+} -+ -+/** -+ * ubifs_decompress - decompress data. -+ * @in_buf: data to decompress -+ * @in_len: length of the data to decompress -+ * @out_buf: output buffer where decompressed data should -+ * @out_len: output length is returned here -+ * @compr_type: type of compression -+ * -+ * This function decompresses data from buffer @in_buf into buffer @out_buf. -+ * The length of the uncompressed data is returned in @out_len. This functions -+ * returns %0 on success or a negative error code on failure. -+ */ -+int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, -+ int *out_len, int compr_type) -+{ -+ int err; -+ struct ubifs_compressor *compr; -+ -+ if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { -+ ubifs_err("invalid compression type %d", compr_type); -+ return -EINVAL; -+ } -+ -+ compr = ubifs_compressors[compr_type]; -+ -+ if (unlikely(!compr->capi_name)) { -+ ubifs_err("%s compression is not compiled in", compr->name); -+ return -EINVAL; -+ } -+ -+ if (compr_type == UBIFS_COMPR_NONE) { -+ memcpy(out_buf, in_buf, in_len); -+ *out_len = in_len; -+ return 0; -+ } -+ -+ if (compr->decomp_mutex) -+ mutex_lock(compr->decomp_mutex); -+ err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, -+ (unsigned int *)out_len); -+ if (compr->decomp_mutex) -+ mutex_unlock(compr->decomp_mutex); -+ if (err) -+ ubifs_err("cannot decompress %d bytes, compressor %s, " -+ "error %d", in_len, compr->name, err); -+ -+ return err; -+} -+ -+/** -+ * compr_init - initialize a compressor. -+ * @compr: compressor description object -+ * -+ * This function initializes the requested compressor and returns zero in case -+ * of success or a negative error code in case of failure. -+ */ -+static int __init compr_init(struct ubifs_compressor *compr) -+{ -+ if (compr->capi_name) { -+ compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); -+ if (IS_ERR(compr->cc)) { -+ ubifs_err("cannot initialize compressor %s, error %ld", -+ compr->name, PTR_ERR(compr->cc)); -+ return PTR_ERR(compr->cc); -+ } -+ } -+ -+ ubifs_compressors[compr->compr_type] = compr; -+ return 0; -+} -+ -+/** -+ * compr_exit - de-initialize a compressor. -+ * @compr: compressor description object -+ */ -+static void compr_exit(struct ubifs_compressor *compr) -+{ -+ if (compr->capi_name) -+ crypto_free_comp(compr->cc); -+ return; -+} -+ -+/** -+ * ubifs_compressors_init - initialize UBIFS compressors. -+ * -+ * This function initializes the compressor which were compiled in. Returns -+ * zero in case of success and a negative error code in case of failure. -+ */ -+int __init ubifs_compressors_init(void) -+{ -+ int err; -+ -+ err = compr_init(&lzo_compr); -+ if (err) -+ return err; -+ -+ err = compr_init(&zlib_compr); -+ if (err) -+ goto out_lzo; -+ -+ ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; -+ return 0; -+ -+out_lzo: -+ compr_exit(&lzo_compr); -+ return err; -+} -+ -+/** -+ * ubifs_compressors_exit - de-initialize UBIFS compressors. -+ */ -+void ubifs_compressors_exit(void) -+{ -+ compr_exit(&lzo_compr); -+ compr_exit(&zlib_compr); -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/debug.c linux-2.6.24/fs/ubifs/debug.c ---- linux-2.6.24.orig/fs/ubifs/debug.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/debug.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,2603 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file implements most of the debugging stuff which is compiled in only -+ * when it is enabled. But some debugging check functions are implemented in -+ * corresponding subsystem, just because they are closely related and utilize -+ * various local functions of those subsystems. -+ */ -+ -+#define UBIFS_DBG_PRESERVE_UBI -+ -+#include "ubifs.h" -+#include <linux/module.h> -+#include <linux/moduleparam.h> -+#include <linux/debugfs.h> -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+DEFINE_SPINLOCK(dbg_lock); -+ -+static char dbg_key_buf0[128]; -+static char dbg_key_buf1[128]; -+ -+unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; -+unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; -+unsigned int ubifs_tst_flags; -+ -+module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); -+module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); -+module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); -+ -+MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); -+MODULE_PARM_DESC(debug_chks, "Debug check flags"); -+MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); -+ -+static const char *get_key_fmt(int fmt) -+{ -+ switch (fmt) { -+ case UBIFS_SIMPLE_KEY_FMT: -+ return "simple"; -+ default: -+ return "unknown/invalid format"; -+ } -+} -+ -+static const char *get_key_hash(int hash) -+{ -+ switch (hash) { -+ case UBIFS_KEY_HASH_R5: -+ return "R5"; -+ case UBIFS_KEY_HASH_TEST: -+ return "test"; -+ default: -+ return "unknown/invalid name hash"; -+ } -+} -+ -+static const char *get_key_type(int type) -+{ -+ switch (type) { -+ case UBIFS_INO_KEY: -+ return "inode"; -+ case UBIFS_DENT_KEY: -+ return "direntry"; -+ case UBIFS_XENT_KEY: -+ return "xentry"; -+ case UBIFS_DATA_KEY: -+ return "data"; -+ case UBIFS_TRUN_KEY: -+ return "truncate"; -+ default: -+ return "unknown/invalid key"; -+ } -+} -+ -+static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, -+ char *buffer) -+{ -+ char *p = buffer; -+ int type = key_type(c, key); -+ -+ if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { -+ switch (type) { -+ case UBIFS_INO_KEY: -+ sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), -+ get_key_type(type)); -+ break; -+ case UBIFS_DENT_KEY: -+ case UBIFS_XENT_KEY: -+ sprintf(p, "(%lu, %s, %#08x)", -+ (unsigned long)key_inum(c, key), -+ get_key_type(type), key_hash(c, key)); -+ break; -+ case UBIFS_DATA_KEY: -+ sprintf(p, "(%lu, %s, %u)", -+ (unsigned long)key_inum(c, key), -+ get_key_type(type), key_block(c, key)); -+ break; -+ case UBIFS_TRUN_KEY: -+ sprintf(p, "(%lu, %s)", -+ (unsigned long)key_inum(c, key), -+ get_key_type(type)); -+ break; -+ default: -+ sprintf(p, "(bad key type: %#08x, %#08x)", -+ key->u32[0], key->u32[1]); -+ } -+ } else -+ sprintf(p, "bad key format %d", c->key_fmt); -+} -+ -+const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) -+{ -+ /* dbg_lock must be held */ -+ sprintf_key(c, key, dbg_key_buf0); -+ return dbg_key_buf0; -+} -+ -+const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) -+{ -+ /* dbg_lock must be held */ -+ sprintf_key(c, key, dbg_key_buf1); -+ return dbg_key_buf1; -+} -+ -+const char *dbg_ntype(int type) -+{ -+ switch (type) { -+ case UBIFS_PAD_NODE: -+ return "padding node"; -+ case UBIFS_SB_NODE: -+ return "superblock node"; -+ case UBIFS_MST_NODE: -+ return "master node"; -+ case UBIFS_REF_NODE: -+ return "reference node"; -+ case UBIFS_INO_NODE: -+ return "inode node"; -+ case UBIFS_DENT_NODE: -+ return "direntry node"; -+ case UBIFS_XENT_NODE: -+ return "xentry node"; -+ case UBIFS_DATA_NODE: -+ return "data node"; -+ case UBIFS_TRUN_NODE: -+ return "truncate node"; -+ case UBIFS_IDX_NODE: -+ return "indexing node"; -+ case UBIFS_CS_NODE: -+ return "commit start node"; -+ case UBIFS_ORPH_NODE: -+ return "orphan node"; -+ default: -+ return "unknown node"; -+ } -+} -+ -+static const char *dbg_gtype(int type) -+{ -+ switch (type) { -+ case UBIFS_NO_NODE_GROUP: -+ return "no node group"; -+ case UBIFS_IN_NODE_GROUP: -+ return "in node group"; -+ case UBIFS_LAST_OF_NODE_GROUP: -+ return "last of node group"; -+ default: -+ return "unknown"; -+ } -+} -+ -+const char *dbg_cstate(int cmt_state) -+{ -+ switch (cmt_state) { -+ case COMMIT_RESTING: -+ return "commit resting"; -+ case COMMIT_BACKGROUND: -+ return "background commit requested"; -+ case COMMIT_REQUIRED: -+ return "commit required"; -+ case COMMIT_RUNNING_BACKGROUND: -+ return "BACKGROUND commit running"; -+ case COMMIT_RUNNING_REQUIRED: -+ return "commit running and required"; -+ case COMMIT_BROKEN: -+ return "broken commit"; -+ default: -+ return "unknown commit state"; -+ } -+} -+ -+static void dump_ch(const struct ubifs_ch *ch) -+{ -+ printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); -+ printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); -+ printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, -+ dbg_ntype(ch->node_type)); -+ printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, -+ dbg_gtype(ch->group_type)); -+ printk(KERN_DEBUG "\tsqnum %llu\n", -+ (unsigned long long)le64_to_cpu(ch->sqnum)); -+ printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); -+} -+ -+void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) -+{ -+ const struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ printk(KERN_DEBUG "Dump in-memory inode:"); -+ printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); -+ printk(KERN_DEBUG "\tsize %llu\n", -+ (unsigned long long)i_size_read(inode)); -+ printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); -+ printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); -+ printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); -+ printk(KERN_DEBUG "\tatime %u.%u\n", -+ (unsigned int)inode->i_atime.tv_sec, -+ (unsigned int)inode->i_atime.tv_nsec); -+ printk(KERN_DEBUG "\tmtime %u.%u\n", -+ (unsigned int)inode->i_mtime.tv_sec, -+ (unsigned int)inode->i_mtime.tv_nsec); -+ printk(KERN_DEBUG "\tctime %u.%u\n", -+ (unsigned int)inode->i_ctime.tv_sec, -+ (unsigned int)inode->i_ctime.tv_nsec); -+ printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); -+ printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); -+ printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); -+ printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); -+ printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); -+ printk(KERN_DEBUG "\txattr %u\n", ui->xattr); -+ printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); -+ printk(KERN_DEBUG "\tsynced_i_size %llu\n", -+ (unsigned long long)ui->synced_i_size); -+ printk(KERN_DEBUG "\tui_size %llu\n", -+ (unsigned long long)ui->ui_size); -+ printk(KERN_DEBUG "\tflags %d\n", ui->flags); -+ printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); -+ printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); -+ printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); -+ printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); -+} -+ -+void dbg_dump_node(const struct ubifs_info *c, const void *node) -+{ -+ int i, n; -+ union ubifs_key key; -+ const struct ubifs_ch *ch = node; -+ -+ if (dbg_failure_mode) -+ return; -+ -+ /* If the magic is incorrect, just hexdump the first bytes */ -+ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { -+ printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); -+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, -+ (void *)node, UBIFS_CH_SZ, 1); -+ return; -+ } -+ -+ spin_lock(&dbg_lock); -+ dump_ch(node); -+ -+ switch (ch->node_type) { -+ case UBIFS_PAD_NODE: -+ { -+ const struct ubifs_pad_node *pad = node; -+ -+ printk(KERN_DEBUG "\tpad_len %u\n", -+ le32_to_cpu(pad->pad_len)); -+ break; -+ } -+ case UBIFS_SB_NODE: -+ { -+ const struct ubifs_sb_node *sup = node; -+ unsigned int sup_flags = le32_to_cpu(sup->flags); -+ -+ printk(KERN_DEBUG "\tkey_hash %d (%s)\n", -+ (int)sup->key_hash, get_key_hash(sup->key_hash)); -+ printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", -+ (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); -+ printk(KERN_DEBUG "\tflags %#x\n", sup_flags); -+ printk(KERN_DEBUG "\t big_lpt %u\n", -+ !!(sup_flags & UBIFS_FLG_BIGLPT)); -+ printk(KERN_DEBUG "\tmin_io_size %u\n", -+ le32_to_cpu(sup->min_io_size)); -+ printk(KERN_DEBUG "\tleb_size %u\n", -+ le32_to_cpu(sup->leb_size)); -+ printk(KERN_DEBUG "\tleb_cnt %u\n", -+ le32_to_cpu(sup->leb_cnt)); -+ printk(KERN_DEBUG "\tmax_leb_cnt %u\n", -+ le32_to_cpu(sup->max_leb_cnt)); -+ printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", -+ (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); -+ printk(KERN_DEBUG "\tlog_lebs %u\n", -+ le32_to_cpu(sup->log_lebs)); -+ printk(KERN_DEBUG "\tlpt_lebs %u\n", -+ le32_to_cpu(sup->lpt_lebs)); -+ printk(KERN_DEBUG "\torph_lebs %u\n", -+ le32_to_cpu(sup->orph_lebs)); -+ printk(KERN_DEBUG "\tjhead_cnt %u\n", -+ le32_to_cpu(sup->jhead_cnt)); -+ printk(KERN_DEBUG "\tfanout %u\n", -+ le32_to_cpu(sup->fanout)); -+ printk(KERN_DEBUG "\tlsave_cnt %u\n", -+ le32_to_cpu(sup->lsave_cnt)); -+ printk(KERN_DEBUG "\tdefault_compr %u\n", -+ (int)le16_to_cpu(sup->default_compr)); -+ printk(KERN_DEBUG "\trp_size %llu\n", -+ (unsigned long long)le64_to_cpu(sup->rp_size)); -+ printk(KERN_DEBUG "\trp_uid %u\n", -+ le32_to_cpu(sup->rp_uid)); -+ printk(KERN_DEBUG "\trp_gid %u\n", -+ le32_to_cpu(sup->rp_gid)); -+ printk(KERN_DEBUG "\tfmt_version %u\n", -+ le32_to_cpu(sup->fmt_version)); -+ printk(KERN_DEBUG "\ttime_gran %u\n", -+ le32_to_cpu(sup->time_gran)); -+ printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" -+ "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", -+ sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3], -+ sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7], -+ sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11], -+ sup->uuid[12], sup->uuid[13], sup->uuid[14], -+ sup->uuid[15]); -+ break; -+ } -+ case UBIFS_MST_NODE: -+ { -+ const struct ubifs_mst_node *mst = node; -+ -+ printk(KERN_DEBUG "\thighest_inum %llu\n", -+ (unsigned long long)le64_to_cpu(mst->highest_inum)); -+ printk(KERN_DEBUG "\tcommit number %llu\n", -+ (unsigned long long)le64_to_cpu(mst->cmt_no)); -+ printk(KERN_DEBUG "\tflags %#x\n", -+ le32_to_cpu(mst->flags)); -+ printk(KERN_DEBUG "\tlog_lnum %u\n", -+ le32_to_cpu(mst->log_lnum)); -+ printk(KERN_DEBUG "\troot_lnum %u\n", -+ le32_to_cpu(mst->root_lnum)); -+ printk(KERN_DEBUG "\troot_offs %u\n", -+ le32_to_cpu(mst->root_offs)); -+ printk(KERN_DEBUG "\troot_len %u\n", -+ le32_to_cpu(mst->root_len)); -+ printk(KERN_DEBUG "\tgc_lnum %u\n", -+ le32_to_cpu(mst->gc_lnum)); -+ printk(KERN_DEBUG "\tihead_lnum %u\n", -+ le32_to_cpu(mst->ihead_lnum)); -+ printk(KERN_DEBUG "\tihead_offs %u\n", -+ le32_to_cpu(mst->ihead_offs)); -+ printk(KERN_DEBUG "\tindex_size %llu\n", -+ (unsigned long long)le64_to_cpu(mst->index_size)); -+ printk(KERN_DEBUG "\tlpt_lnum %u\n", -+ le32_to_cpu(mst->lpt_lnum)); -+ printk(KERN_DEBUG "\tlpt_offs %u\n", -+ le32_to_cpu(mst->lpt_offs)); -+ printk(KERN_DEBUG "\tnhead_lnum %u\n", -+ le32_to_cpu(mst->nhead_lnum)); -+ printk(KERN_DEBUG "\tnhead_offs %u\n", -+ le32_to_cpu(mst->nhead_offs)); -+ printk(KERN_DEBUG "\tltab_lnum %u\n", -+ le32_to_cpu(mst->ltab_lnum)); -+ printk(KERN_DEBUG "\tltab_offs %u\n", -+ le32_to_cpu(mst->ltab_offs)); -+ printk(KERN_DEBUG "\tlsave_lnum %u\n", -+ le32_to_cpu(mst->lsave_lnum)); -+ printk(KERN_DEBUG "\tlsave_offs %u\n", -+ le32_to_cpu(mst->lsave_offs)); -+ printk(KERN_DEBUG "\tlscan_lnum %u\n", -+ le32_to_cpu(mst->lscan_lnum)); -+ printk(KERN_DEBUG "\tleb_cnt %u\n", -+ le32_to_cpu(mst->leb_cnt)); -+ printk(KERN_DEBUG "\tempty_lebs %u\n", -+ le32_to_cpu(mst->empty_lebs)); -+ printk(KERN_DEBUG "\tidx_lebs %u\n", -+ le32_to_cpu(mst->idx_lebs)); -+ printk(KERN_DEBUG "\ttotal_free %llu\n", -+ (unsigned long long)le64_to_cpu(mst->total_free)); -+ printk(KERN_DEBUG "\ttotal_dirty %llu\n", -+ (unsigned long long)le64_to_cpu(mst->total_dirty)); -+ printk(KERN_DEBUG "\ttotal_used %llu\n", -+ (unsigned long long)le64_to_cpu(mst->total_used)); -+ printk(KERN_DEBUG "\ttotal_dead %llu\n", -+ (unsigned long long)le64_to_cpu(mst->total_dead)); -+ printk(KERN_DEBUG "\ttotal_dark %llu\n", -+ (unsigned long long)le64_to_cpu(mst->total_dark)); -+ break; -+ } -+ case UBIFS_REF_NODE: -+ { -+ const struct ubifs_ref_node *ref = node; -+ -+ printk(KERN_DEBUG "\tlnum %u\n", -+ le32_to_cpu(ref->lnum)); -+ printk(KERN_DEBUG "\toffs %u\n", -+ le32_to_cpu(ref->offs)); -+ printk(KERN_DEBUG "\tjhead %u\n", -+ le32_to_cpu(ref->jhead)); -+ break; -+ } -+ case UBIFS_INO_NODE: -+ { -+ const struct ubifs_ino_node *ino = node; -+ -+ key_read(c, &ino->key, &key); -+ printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); -+ printk(KERN_DEBUG "\tcreat_sqnum %llu\n", -+ (unsigned long long)le64_to_cpu(ino->creat_sqnum)); -+ printk(KERN_DEBUG "\tsize %llu\n", -+ (unsigned long long)le64_to_cpu(ino->size)); -+ printk(KERN_DEBUG "\tnlink %u\n", -+ le32_to_cpu(ino->nlink)); -+ printk(KERN_DEBUG "\tatime %lld.%u\n", -+ (long long)le64_to_cpu(ino->atime_sec), -+ le32_to_cpu(ino->atime_nsec)); -+ printk(KERN_DEBUG "\tmtime %lld.%u\n", -+ (long long)le64_to_cpu(ino->mtime_sec), -+ le32_to_cpu(ino->mtime_nsec)); -+ printk(KERN_DEBUG "\tctime %lld.%u\n", -+ (long long)le64_to_cpu(ino->ctime_sec), -+ le32_to_cpu(ino->ctime_nsec)); -+ printk(KERN_DEBUG "\tuid %u\n", -+ le32_to_cpu(ino->uid)); -+ printk(KERN_DEBUG "\tgid %u\n", -+ le32_to_cpu(ino->gid)); -+ printk(KERN_DEBUG "\tmode %u\n", -+ le32_to_cpu(ino->mode)); -+ printk(KERN_DEBUG "\tflags %#x\n", -+ le32_to_cpu(ino->flags)); -+ printk(KERN_DEBUG "\txattr_cnt %u\n", -+ le32_to_cpu(ino->xattr_cnt)); -+ printk(KERN_DEBUG "\txattr_size %u\n", -+ le32_to_cpu(ino->xattr_size)); -+ printk(KERN_DEBUG "\txattr_names %u\n", -+ le32_to_cpu(ino->xattr_names)); -+ printk(KERN_DEBUG "\tcompr_type %#x\n", -+ (int)le16_to_cpu(ino->compr_type)); -+ printk(KERN_DEBUG "\tdata len %u\n", -+ le32_to_cpu(ino->data_len)); -+ break; -+ } -+ case UBIFS_DENT_NODE: -+ case UBIFS_XENT_NODE: -+ { -+ const struct ubifs_dent_node *dent = node; -+ int nlen = le16_to_cpu(dent->nlen); -+ -+ key_read(c, &dent->key, &key); -+ printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); -+ printk(KERN_DEBUG "\tinum %llu\n", -+ (unsigned long long)le64_to_cpu(dent->inum)); -+ printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); -+ printk(KERN_DEBUG "\tnlen %d\n", nlen); -+ printk(KERN_DEBUG "\tname "); -+ -+ if (nlen > UBIFS_MAX_NLEN) -+ printk(KERN_DEBUG "(bad name length, not printing, " -+ "bad or corrupted node)"); -+ else { -+ for (i = 0; i < nlen && dent->name[i]; i++) -+ printk("%c", dent->name[i]); -+ } -+ printk("\n"); -+ -+ break; -+ } -+ case UBIFS_DATA_NODE: -+ { -+ const struct ubifs_data_node *dn = node; -+ int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; -+ -+ key_read(c, &dn->key, &key); -+ printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); -+ printk(KERN_DEBUG "\tsize %u\n", -+ le32_to_cpu(dn->size)); -+ printk(KERN_DEBUG "\tcompr_typ %d\n", -+ (int)le16_to_cpu(dn->compr_type)); -+ printk(KERN_DEBUG "\tdata size %d\n", -+ dlen); -+ printk(KERN_DEBUG "\tdata:\n"); -+ print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, -+ (void *)&dn->data, dlen, 0); -+ break; -+ } -+ case UBIFS_TRUN_NODE: -+ { -+ const struct ubifs_trun_node *trun = node; -+ -+ printk(KERN_DEBUG "\tinum %u\n", -+ le32_to_cpu(trun->inum)); -+ printk(KERN_DEBUG "\told_size %llu\n", -+ (unsigned long long)le64_to_cpu(trun->old_size)); -+ printk(KERN_DEBUG "\tnew_size %llu\n", -+ (unsigned long long)le64_to_cpu(trun->new_size)); -+ break; -+ } -+ case UBIFS_IDX_NODE: -+ { -+ const struct ubifs_idx_node *idx = node; -+ -+ n = le16_to_cpu(idx->child_cnt); -+ printk(KERN_DEBUG "\tchild_cnt %d\n", n); -+ printk(KERN_DEBUG "\tlevel %d\n", -+ (int)le16_to_cpu(idx->level)); -+ printk(KERN_DEBUG "\tBranches:\n"); -+ -+ for (i = 0; i < n && i < c->fanout - 1; i++) { -+ const struct ubifs_branch *br; -+ -+ br = ubifs_idx_branch(c, idx, i); -+ key_read(c, &br->key, &key); -+ printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", -+ i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), -+ le32_to_cpu(br->len), DBGKEY(&key)); -+ } -+ break; -+ } -+ case UBIFS_CS_NODE: -+ break; -+ case UBIFS_ORPH_NODE: -+ { -+ const struct ubifs_orph_node *orph = node; -+ -+ printk(KERN_DEBUG "\tcommit number %llu\n", -+ (unsigned long long) -+ le64_to_cpu(orph->cmt_no) & LLONG_MAX); -+ printk(KERN_DEBUG "\tlast node flag %llu\n", -+ (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); -+ n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; -+ printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); -+ for (i = 0; i < n; i++) -+ printk(KERN_DEBUG "\t ino %llu\n", -+ (unsigned long long)le64_to_cpu(orph->inos[i])); -+ break; -+ } -+ default: -+ printk(KERN_DEBUG "node type %d was not recognized\n", -+ (int)ch->node_type); -+ } -+ spin_unlock(&dbg_lock); -+} -+ -+void dbg_dump_budget_req(const struct ubifs_budget_req *req) -+{ -+ spin_lock(&dbg_lock); -+ printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", -+ req->new_ino, req->dirtied_ino); -+ printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", -+ req->new_ino_d, req->dirtied_ino_d); -+ printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", -+ req->new_page, req->dirtied_page); -+ printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", -+ req->new_dent, req->mod_dent); -+ printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); -+ printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", -+ req->data_growth, req->dd_growth); -+ spin_unlock(&dbg_lock); -+} -+ -+void dbg_dump_lstats(const struct ubifs_lp_stats *lst) -+{ -+ spin_lock(&dbg_lock); -+ printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " -+ "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); -+ printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " -+ "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, -+ lst->total_dirty); -+ printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " -+ "total_dead %lld\n", lst->total_used, lst->total_dark, -+ lst->total_dead); -+ spin_unlock(&dbg_lock); -+} -+ -+void dbg_dump_budg(struct ubifs_info *c) -+{ -+ int i; -+ struct rb_node *rb; -+ struct ubifs_bud *bud; -+ struct ubifs_gced_idx_leb *idx_gc; -+ long long available, outstanding, free; -+ -+ ubifs_assert(spin_is_locked(&c->space_lock)); -+ spin_lock(&dbg_lock); -+ printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " -+ "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, -+ c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); -+ printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " -+ "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, -+ c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, -+ c->freeable_cnt); -+ printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " -+ "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, -+ c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); -+ printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " -+ "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), -+ atomic_long_read(&c->dirty_zn_cnt), -+ atomic_long_read(&c->clean_zn_cnt)); -+ printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", -+ c->dark_wm, c->dead_wm, c->max_idx_node_sz); -+ printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", -+ c->gc_lnum, c->ihead_lnum); -+ /* If we are in R/O mode, journal heads do not exist */ -+ if (c->jheads) -+ for (i = 0; i < c->jhead_cnt; i++) -+ printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", -+ c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); -+ for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { -+ bud = rb_entry(rb, struct ubifs_bud, rb); -+ printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); -+ } -+ list_for_each_entry(bud, &c->old_buds, list) -+ printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); -+ list_for_each_entry(idx_gc, &c->idx_gc, list) -+ printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", -+ idx_gc->lnum, idx_gc->unmap); -+ printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); -+ -+ /* Print budgeting predictions */ -+ available = ubifs_calc_available(c, c->min_idx_lebs); -+ outstanding = c->budg_data_growth + c->budg_dd_growth; -+ free = ubifs_get_free_space_nolock(c); -+ printk(KERN_DEBUG "Budgeting predictions:\n"); -+ printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", -+ available, outstanding, free); -+ spin_unlock(&dbg_lock); -+} -+ -+void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) -+{ -+ printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " -+ "flags %#x\n", lp->lnum, lp->free, lp->dirty, -+ c->leb_size - lp->free - lp->dirty, lp->flags); -+} -+ -+void dbg_dump_lprops(struct ubifs_info *c) -+{ -+ int lnum, err; -+ struct ubifs_lprops lp; -+ struct ubifs_lp_stats lst; -+ -+ printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", -+ current->pid); -+ ubifs_get_lp_stats(c, &lst); -+ dbg_dump_lstats(&lst); -+ -+ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { -+ err = ubifs_read_one_lp(c, lnum, &lp); -+ if (err) -+ ubifs_err("cannot read lprops for LEB %d", lnum); -+ -+ dbg_dump_lprop(c, &lp); -+ } -+ printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", -+ current->pid); -+} -+ -+void dbg_dump_lpt_info(struct ubifs_info *c) -+{ -+ int i; -+ -+ spin_lock(&dbg_lock); -+ printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); -+ printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); -+ printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); -+ printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); -+ printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); -+ printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); -+ printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); -+ printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); -+ printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); -+ printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); -+ printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); -+ printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); -+ printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); -+ printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); -+ printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); -+ printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); -+ printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); -+ printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); -+ printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); -+ printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); -+ printk(KERN_DEBUG "\tLPT head is at %d:%d\n", -+ c->nhead_lnum, c->nhead_offs); -+ printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", -+ c->ltab_lnum, c->ltab_offs); -+ if (c->big_lpt) -+ printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", -+ c->lsave_lnum, c->lsave_offs); -+ for (i = 0; i < c->lpt_lebs; i++) -+ printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " -+ "cmt %d\n", i + c->lpt_first, c->ltab[i].free, -+ c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); -+ spin_unlock(&dbg_lock); -+} -+ -+void dbg_dump_leb(const struct ubifs_info *c, int lnum) -+{ -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ -+ if (dbg_failure_mode) -+ return; -+ -+ printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", -+ current->pid, lnum); -+ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); -+ if (IS_ERR(sleb)) { -+ ubifs_err("scan error %d", (int)PTR_ERR(sleb)); -+ return; -+ } -+ -+ printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, -+ sleb->nodes_cnt, sleb->endpt); -+ -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ cond_resched(); -+ printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, -+ snod->offs, snod->len); -+ dbg_dump_node(c, snod->node); -+ } -+ -+ printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", -+ current->pid, lnum); -+ ubifs_scan_destroy(sleb); -+ return; -+} -+ -+void dbg_dump_znode(const struct ubifs_info *c, -+ const struct ubifs_znode *znode) -+{ -+ int n; -+ const struct ubifs_zbranch *zbr; -+ -+ spin_lock(&dbg_lock); -+ if (znode->parent) -+ zbr = &znode->parent->zbranch[znode->iip]; -+ else -+ zbr = &c->zroot; -+ -+ printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" -+ " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, -+ zbr->len, znode->parent, znode->iip, znode->level, -+ znode->child_cnt, znode->flags); -+ -+ if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { -+ spin_unlock(&dbg_lock); -+ return; -+ } -+ -+ printk(KERN_DEBUG "zbranches:\n"); -+ for (n = 0; n < znode->child_cnt; n++) { -+ zbr = &znode->zbranch[n]; -+ if (znode->level > 0) -+ printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " -+ "%s\n", n, zbr->znode, zbr->lnum, -+ zbr->offs, zbr->len, -+ DBGKEY(&zbr->key)); -+ else -+ printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " -+ "%s\n", n, zbr->znode, zbr->lnum, -+ zbr->offs, zbr->len, -+ DBGKEY(&zbr->key)); -+ } -+ spin_unlock(&dbg_lock); -+} -+ -+void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) -+{ -+ int i; -+ -+ printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", -+ current->pid, cat, heap->cnt); -+ for (i = 0; i < heap->cnt; i++) { -+ struct ubifs_lprops *lprops = heap->arr[i]; -+ -+ printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " -+ "flags %d\n", i, lprops->lnum, lprops->hpos, -+ lprops->free, lprops->dirty, lprops->flags); -+ } -+ printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); -+} -+ -+void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, -+ struct ubifs_nnode *parent, int iip) -+{ -+ int i; -+ -+ printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); -+ printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", -+ (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); -+ printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", -+ pnode->flags, iip, pnode->level, pnode->num); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ struct ubifs_lprops *lp = &pnode->lprops[i]; -+ -+ printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", -+ i, lp->free, lp->dirty, lp->flags, lp->lnum); -+ } -+} -+ -+void dbg_dump_tnc(struct ubifs_info *c) -+{ -+ struct ubifs_znode *znode; -+ int level; -+ -+ printk(KERN_DEBUG "\n"); -+ printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); -+ znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); -+ level = znode->level; -+ printk(KERN_DEBUG "== Level %d ==\n", level); -+ while (znode) { -+ if (level != znode->level) { -+ level = znode->level; -+ printk(KERN_DEBUG "== Level %d ==\n", level); -+ } -+ dbg_dump_znode(c, znode); -+ znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); -+ } -+ printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); -+} -+ -+static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, -+ void *priv) -+{ -+ dbg_dump_znode(c, znode); -+ return 0; -+} -+ -+/** -+ * dbg_dump_index - dump the on-flash index. -+ * @c: UBIFS file-system description object -+ * -+ * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()' -+ * which dumps only in-memory znodes and does not read znodes which from flash. -+ */ -+void dbg_dump_index(struct ubifs_info *c) -+{ -+ dbg_walk_index(c, NULL, dump_znode, NULL); -+} -+ -+/** -+ * dbg_save_space_info - save information about flash space. -+ * @c: UBIFS file-system description object -+ * -+ * This function saves information about UBIFS free space, dirty space, etc, in -+ * order to check it later. -+ */ -+void dbg_save_space_info(struct ubifs_info *c) -+{ -+ struct ubifs_debug_info *d = c->dbg; -+ -+ ubifs_get_lp_stats(c, &d->saved_lst); -+ -+ spin_lock(&c->space_lock); -+ d->saved_free = ubifs_get_free_space_nolock(c); -+ spin_unlock(&c->space_lock); -+} -+ -+/** -+ * dbg_check_space_info - check flash space information. -+ * @c: UBIFS file-system description object -+ * -+ * This function compares current flash space information with the information -+ * which was saved when the 'dbg_save_space_info()' function was called. -+ * Returns zero if the information has not changed, and %-EINVAL it it has -+ * changed. -+ */ -+int dbg_check_space_info(struct ubifs_info *c) -+{ -+ struct ubifs_debug_info *d = c->dbg; -+ struct ubifs_lp_stats lst; -+ long long avail, free; -+ -+ spin_lock(&c->space_lock); -+ avail = ubifs_calc_available(c, c->min_idx_lebs); -+ spin_unlock(&c->space_lock); -+ free = ubifs_get_free_space(c); -+ -+ if (free != d->saved_free) { -+ ubifs_err("free space changed from %lld to %lld", -+ d->saved_free, free); -+ goto out; -+ } -+ -+ return 0; -+ -+out: -+ ubifs_msg("saved lprops statistics dump"); -+ dbg_dump_lstats(&d->saved_lst); -+ ubifs_get_lp_stats(c, &lst); -+ ubifs_msg("current lprops statistics dump"); -+ dbg_dump_lstats(&d->saved_lst); -+ spin_lock(&c->space_lock); -+ dbg_dump_budg(c); -+ spin_unlock(&c->space_lock); -+ dump_stack(); -+ return -EINVAL; -+} -+ -+/** -+ * dbg_check_synced_i_size - check synchronized inode size. -+ * @inode: inode to check -+ * -+ * If inode is clean, synchronized inode size has to be equivalent to current -+ * inode size. This function has to be called only for locked inodes (@i_mutex -+ * has to be locked). Returns %0 if synchronized inode size if correct, and -+ * %-EINVAL if not. -+ */ -+int dbg_check_synced_i_size(struct inode *inode) -+{ -+ int err = 0; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) -+ return 0; -+ if (!S_ISREG(inode->i_mode)) -+ return 0; -+ -+ mutex_lock(&ui->ui_mutex); -+ spin_lock(&ui->ui_lock); -+ if (ui->ui_size != ui->synced_i_size && !ui->dirty) { -+ ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " -+ "is clean", ui->ui_size, ui->synced_i_size); -+ ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, -+ inode->i_mode, i_size_read(inode)); -+ dbg_dump_stack(); -+ err = -EINVAL; -+ } -+ spin_unlock(&ui->ui_lock); -+ mutex_unlock(&ui->ui_mutex); -+ return err; -+} -+ -+/* -+ * dbg_check_dir - check directory inode size and link count. -+ * @c: UBIFS file-system description object -+ * @dir: the directory to calculate size for -+ * @size: the result is returned here -+ * -+ * This function makes sure that directory size and link count are correct. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ * -+ * Note, it is good idea to make sure the @dir->i_mutex is locked before -+ * calling this function. -+ */ -+int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) -+{ -+ unsigned int nlink = 2; -+ union ubifs_key key; -+ struct ubifs_dent_node *dent, *pdent = NULL; -+ struct qstr nm = { .name = NULL }; -+ loff_t size = UBIFS_INO_NODE_SZ; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) -+ return 0; -+ -+ if (!S_ISDIR(dir->i_mode)) -+ return 0; -+ -+ lowest_dent_key(c, &key, dir->i_ino); -+ while (1) { -+ int err; -+ -+ dent = ubifs_tnc_next_ent(c, &key, &nm); -+ if (IS_ERR(dent)) { -+ err = PTR_ERR(dent); -+ if (err == -ENOENT) -+ break; -+ return err; -+ } -+ -+ nm.name = dent->name; -+ nm.len = le16_to_cpu(dent->nlen); -+ size += CALC_DENT_SIZE(nm.len); -+ if (dent->type == UBIFS_ITYPE_DIR) -+ nlink += 1; -+ kfree(pdent); -+ pdent = dent; -+ key_read(c, &dent->key, &key); -+ } -+ kfree(pdent); -+ -+ if (i_size_read(dir) != size) { -+ ubifs_err("directory inode %lu has size %llu, " -+ "but calculated size is %llu", dir->i_ino, -+ (unsigned long long)i_size_read(dir), -+ (unsigned long long)size); -+ dump_stack(); -+ return -EINVAL; -+ } -+ if (dir->i_nlink != nlink) { -+ ubifs_err("directory inode %lu has nlink %u, but calculated " -+ "nlink is %u", dir->i_ino, dir->i_nlink, nlink); -+ dump_stack(); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+/** -+ * dbg_check_key_order - make sure that colliding keys are properly ordered. -+ * @c: UBIFS file-system description object -+ * @zbr1: first zbranch -+ * @zbr2: following zbranch -+ * -+ * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of -+ * names of the direntries/xentries which are referred by the keys. This -+ * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes -+ * sure the name of direntry/xentry referred by @zbr1 is less than -+ * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not, -+ * and a negative error code in case of failure. -+ */ -+static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, -+ struct ubifs_zbranch *zbr2) -+{ -+ int err, nlen1, nlen2, cmp; -+ struct ubifs_dent_node *dent1, *dent2; -+ union ubifs_key key; -+ -+ ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); -+ dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); -+ if (!dent1) -+ return -ENOMEM; -+ dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); -+ if (!dent2) { -+ err = -ENOMEM; -+ goto out_free; -+ } -+ -+ err = ubifs_tnc_read_node(c, zbr1, dent1); -+ if (err) -+ goto out_free; -+ err = ubifs_validate_entry(c, dent1); -+ if (err) -+ goto out_free; -+ -+ err = ubifs_tnc_read_node(c, zbr2, dent2); -+ if (err) -+ goto out_free; -+ err = ubifs_validate_entry(c, dent2); -+ if (err) -+ goto out_free; -+ -+ /* Make sure node keys are the same as in zbranch */ -+ err = 1; -+ key_read(c, &dent1->key, &key); -+ if (keys_cmp(c, &zbr1->key, &key)) { -+ dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, -+ zbr1->offs, DBGKEY(&key)); -+ dbg_err("but it should have key %s according to tnc", -+ DBGKEY(&zbr1->key)); -+ dbg_dump_node(c, dent1); -+ goto out_free; -+ } -+ -+ key_read(c, &dent2->key, &key); -+ if (keys_cmp(c, &zbr2->key, &key)) { -+ dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, -+ zbr1->offs, DBGKEY(&key)); -+ dbg_err("but it should have key %s according to tnc", -+ DBGKEY(&zbr2->key)); -+ dbg_dump_node(c, dent2); -+ goto out_free; -+ } -+ -+ nlen1 = le16_to_cpu(dent1->nlen); -+ nlen2 = le16_to_cpu(dent2->nlen); -+ -+ cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2)); -+ if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) { -+ err = 0; -+ goto out_free; -+ } -+ if (cmp == 0 && nlen1 == nlen2) -+ dbg_err("2 xent/dent nodes with the same name"); -+ else -+ dbg_err("bad order of colliding key %s", -+ DBGKEY(&key)); -+ -+ ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); -+ dbg_dump_node(c, dent1); -+ ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); -+ dbg_dump_node(c, dent2); -+ -+out_free: -+ kfree(dent2); -+ kfree(dent1); -+ return err; -+} -+ -+/** -+ * dbg_check_znode - check if znode is all right. -+ * @c: UBIFS file-system description object -+ * @zbr: zbranch which points to this znode -+ * -+ * This function makes sure that znode referred to by @zbr is all right. -+ * Returns zero if it is, and %-EINVAL if it is not. -+ */ -+static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) -+{ -+ struct ubifs_znode *znode = zbr->znode; -+ struct ubifs_znode *zp = znode->parent; -+ int n, err, cmp; -+ -+ if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { -+ err = 1; -+ goto out; -+ } -+ if (znode->level < 0) { -+ err = 2; -+ goto out; -+ } -+ if (znode->iip < 0 || znode->iip >= c->fanout) { -+ err = 3; -+ goto out; -+ } -+ -+ if (zbr->len == 0) -+ /* Only dirty zbranch may have no on-flash nodes */ -+ if (!ubifs_zn_dirty(znode)) { -+ err = 4; -+ goto out; -+ } -+ -+ if (ubifs_zn_dirty(znode)) { -+ /* -+ * If znode is dirty, its parent has to be dirty as well. The -+ * order of the operation is important, so we have to have -+ * memory barriers. -+ */ -+ smp_mb(); -+ if (zp && !ubifs_zn_dirty(zp)) { -+ /* -+ * The dirty flag is atomic and is cleared outside the -+ * TNC mutex, so znode's dirty flag may now have -+ * been cleared. The child is always cleared before the -+ * parent, so we just need to check again. -+ */ -+ smp_mb(); -+ if (ubifs_zn_dirty(znode)) { -+ err = 5; -+ goto out; -+ } -+ } -+ } -+ -+ if (zp) { -+ const union ubifs_key *min, *max; -+ -+ if (znode->level != zp->level - 1) { -+ err = 6; -+ goto out; -+ } -+ -+ /* Make sure the 'parent' pointer in our znode is correct */ -+ err = ubifs_search_zbranch(c, zp, &zbr->key, &n); -+ if (!err) { -+ /* This zbranch does not exist in the parent */ -+ err = 7; -+ goto out; -+ } -+ -+ if (znode->iip >= zp->child_cnt) { -+ err = 8; -+ goto out; -+ } -+ -+ if (znode->iip != n) { -+ /* This may happen only in case of collisions */ -+ if (keys_cmp(c, &zp->zbranch[n].key, -+ &zp->zbranch[znode->iip].key)) { -+ err = 9; -+ goto out; -+ } -+ n = znode->iip; -+ } -+ -+ /* -+ * Make sure that the first key in our znode is greater than or -+ * equal to the key in the pointing zbranch. -+ */ -+ min = &zbr->key; -+ cmp = keys_cmp(c, min, &znode->zbranch[0].key); -+ if (cmp == 1) { -+ err = 10; -+ goto out; -+ } -+ -+ if (n + 1 < zp->child_cnt) { -+ max = &zp->zbranch[n + 1].key; -+ -+ /* -+ * Make sure the last key in our znode is less or -+ * equivalent than the the key in zbranch which goes -+ * after our pointing zbranch. -+ */ -+ cmp = keys_cmp(c, max, -+ &znode->zbranch[znode->child_cnt - 1].key); -+ if (cmp == -1) { -+ err = 11; -+ goto out; -+ } -+ } -+ } else { -+ /* This may only be root znode */ -+ if (zbr != &c->zroot) { -+ err = 12; -+ goto out; -+ } -+ } -+ -+ /* -+ * Make sure that next key is greater or equivalent then the previous -+ * one. -+ */ -+ for (n = 1; n < znode->child_cnt; n++) { -+ cmp = keys_cmp(c, &znode->zbranch[n - 1].key, -+ &znode->zbranch[n].key); -+ if (cmp > 0) { -+ err = 13; -+ goto out; -+ } -+ if (cmp == 0) { -+ /* This can only be keys with colliding hash */ -+ if (!is_hash_key(c, &znode->zbranch[n].key)) { -+ err = 14; -+ goto out; -+ } -+ -+ if (znode->level != 0 || c->replaying) -+ continue; -+ -+ /* -+ * Colliding keys should follow binary order of -+ * corresponding xentry/dentry names. -+ */ -+ err = dbg_check_key_order(c, &znode->zbranch[n - 1], -+ &znode->zbranch[n]); -+ if (err < 0) -+ return err; -+ if (err) { -+ err = 15; -+ goto out; -+ } -+ } -+ } -+ -+ for (n = 0; n < znode->child_cnt; n++) { -+ if (!znode->zbranch[n].znode && -+ (znode->zbranch[n].lnum == 0 || -+ znode->zbranch[n].len == 0)) { -+ err = 16; -+ goto out; -+ } -+ -+ if (znode->zbranch[n].lnum != 0 && -+ znode->zbranch[n].len == 0) { -+ err = 17; -+ goto out; -+ } -+ -+ if (znode->zbranch[n].lnum == 0 && -+ znode->zbranch[n].len != 0) { -+ err = 18; -+ goto out; -+ } -+ -+ if (znode->zbranch[n].lnum == 0 && -+ znode->zbranch[n].offs != 0) { -+ err = 19; -+ goto out; -+ } -+ -+ if (znode->level != 0 && znode->zbranch[n].znode) -+ if (znode->zbranch[n].znode->parent != znode) { -+ err = 20; -+ goto out; -+ } -+ } -+ -+ return 0; -+ -+out: -+ ubifs_err("failed, error %d", err); -+ ubifs_msg("dump of the znode"); -+ dbg_dump_znode(c, znode); -+ if (zp) { -+ ubifs_msg("dump of the parent znode"); -+ dbg_dump_znode(c, zp); -+ } -+ dump_stack(); -+ return -EINVAL; -+} -+ -+/** -+ * dbg_check_tnc - check TNC tree. -+ * @c: UBIFS file-system description object -+ * @extra: do extra checks that are possible at start commit -+ * -+ * This function traverses whole TNC tree and checks every znode. Returns zero -+ * if everything is all right and %-EINVAL if something is wrong with TNC. -+ */ -+int dbg_check_tnc(struct ubifs_info *c, int extra) -+{ -+ struct ubifs_znode *znode; -+ long clean_cnt = 0, dirty_cnt = 0; -+ int err, last; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) -+ return 0; -+ -+ ubifs_assert(mutex_is_locked(&c->tnc_mutex)); -+ if (!c->zroot.znode) -+ return 0; -+ -+ znode = ubifs_tnc_postorder_first(c->zroot.znode); -+ while (1) { -+ struct ubifs_znode *prev; -+ struct ubifs_zbranch *zbr; -+ -+ if (!znode->parent) -+ zbr = &c->zroot; -+ else -+ zbr = &znode->parent->zbranch[znode->iip]; -+ -+ err = dbg_check_znode(c, zbr); -+ if (err) -+ return err; -+ -+ if (extra) { -+ if (ubifs_zn_dirty(znode)) -+ dirty_cnt += 1; -+ else -+ clean_cnt += 1; -+ } -+ -+ prev = znode; -+ znode = ubifs_tnc_postorder_next(znode); -+ if (!znode) -+ break; -+ -+ /* -+ * If the last key of this znode is equivalent to the first key -+ * of the next znode (collision), then check order of the keys. -+ */ -+ last = prev->child_cnt - 1; -+ if (prev->level == 0 && znode->level == 0 && !c->replaying && -+ !keys_cmp(c, &prev->zbranch[last].key, -+ &znode->zbranch[0].key)) { -+ err = dbg_check_key_order(c, &prev->zbranch[last], -+ &znode->zbranch[0]); -+ if (err < 0) -+ return err; -+ if (err) { -+ ubifs_msg("first znode"); -+ dbg_dump_znode(c, prev); -+ ubifs_msg("second znode"); -+ dbg_dump_znode(c, znode); -+ return -EINVAL; -+ } -+ } -+ } -+ -+ if (extra) { -+ if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { -+ ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", -+ atomic_long_read(&c->clean_zn_cnt), -+ clean_cnt); -+ return -EINVAL; -+ } -+ if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { -+ ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", -+ atomic_long_read(&c->dirty_zn_cnt), -+ dirty_cnt); -+ return -EINVAL; -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * dbg_walk_index - walk the on-flash index. -+ * @c: UBIFS file-system description object -+ * @leaf_cb: called for each leaf node -+ * @znode_cb: called for each indexing node -+ * @priv: private data which is passed to callbacks -+ * -+ * This function walks the UBIFS index and calls the @leaf_cb for each leaf -+ * node and @znode_cb for each indexing node. Returns zero in case of success -+ * and a negative error code in case of failure. -+ * -+ * It would be better if this function removed every znode it pulled to into -+ * the TNC, so that the behavior more closely matched the non-debugging -+ * behavior. -+ */ -+int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, -+ dbg_znode_callback znode_cb, void *priv) -+{ -+ int err; -+ struct ubifs_zbranch *zbr; -+ struct ubifs_znode *znode, *child; -+ -+ mutex_lock(&c->tnc_mutex); -+ /* If the root indexing node is not in TNC - pull it */ -+ if (!c->zroot.znode) { -+ c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0); -+ if (IS_ERR(c->zroot.znode)) { -+ err = PTR_ERR(c->zroot.znode); -+ c->zroot.znode = NULL; -+ goto out_unlock; -+ } -+ } -+ -+ /* -+ * We are going to traverse the indexing tree in the postorder manner. -+ * Go down and find the leftmost indexing node where we are going to -+ * start from. -+ */ -+ znode = c->zroot.znode; -+ while (znode->level > 0) { -+ zbr = &znode->zbranch[0]; -+ child = zbr->znode; -+ if (!child) { -+ child = ubifs_load_znode(c, zbr, znode, 0); -+ if (IS_ERR(child)) { -+ err = PTR_ERR(child); -+ goto out_unlock; -+ } -+ zbr->znode = child; -+ } -+ -+ znode = child; -+ } -+ -+ /* Iterate over all indexing nodes */ -+ while (1) { -+ int idx; -+ -+ cond_resched(); -+ -+ if (znode_cb) { -+ err = znode_cb(c, znode, priv); -+ if (err) { -+ ubifs_err("znode checking function returned " -+ "error %d", err); -+ dbg_dump_znode(c, znode); -+ goto out_dump; -+ } -+ } -+ if (leaf_cb && znode->level == 0) { -+ for (idx = 0; idx < znode->child_cnt; idx++) { -+ zbr = &znode->zbranch[idx]; -+ err = leaf_cb(c, zbr, priv); -+ if (err) { -+ ubifs_err("leaf checking function " -+ "returned error %d, for leaf " -+ "at LEB %d:%d", -+ err, zbr->lnum, zbr->offs); -+ goto out_dump; -+ } -+ } -+ } -+ -+ if (!znode->parent) -+ break; -+ -+ idx = znode->iip + 1; -+ znode = znode->parent; -+ if (idx < znode->child_cnt) { -+ /* Switch to the next index in the parent */ -+ zbr = &znode->zbranch[idx]; -+ child = zbr->znode; -+ if (!child) { -+ child = ubifs_load_znode(c, zbr, znode, idx); -+ if (IS_ERR(child)) { -+ err = PTR_ERR(child); -+ goto out_unlock; -+ } -+ zbr->znode = child; -+ } -+ znode = child; -+ } else -+ /* -+ * This is the last child, switch to the parent and -+ * continue. -+ */ -+ continue; -+ -+ /* Go to the lowest leftmost znode in the new sub-tree */ -+ while (znode->level > 0) { -+ zbr = &znode->zbranch[0]; -+ child = zbr->znode; -+ if (!child) { -+ child = ubifs_load_znode(c, zbr, znode, 0); -+ if (IS_ERR(child)) { -+ err = PTR_ERR(child); -+ goto out_unlock; -+ } -+ zbr->znode = child; -+ } -+ znode = child; -+ } -+ } -+ -+ mutex_unlock(&c->tnc_mutex); -+ return 0; -+ -+out_dump: -+ if (znode->parent) -+ zbr = &znode->parent->zbranch[znode->iip]; -+ else -+ zbr = &c->zroot; -+ ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); -+ dbg_dump_znode(c, znode); -+out_unlock: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * add_size - add znode size to partially calculated index size. -+ * @c: UBIFS file-system description object -+ * @znode: znode to add size for -+ * @priv: partially calculated index size -+ * -+ * This is a helper function for 'dbg_check_idx_size()' which is called for -+ * every indexing node and adds its size to the 'long long' variable pointed to -+ * by @priv. -+ */ -+static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv) -+{ -+ long long *idx_size = priv; -+ int add; -+ -+ add = ubifs_idx_node_sz(c, znode->child_cnt); -+ add = ALIGN(add, 8); -+ *idx_size += add; -+ return 0; -+} -+ -+/** -+ * dbg_check_idx_size - check index size. -+ * @c: UBIFS file-system description object -+ * @idx_size: size to check -+ * -+ * This function walks the UBIFS index, calculates its size and checks that the -+ * size is equivalent to @idx_size. Returns zero in case of success and a -+ * negative error code in case of failure. -+ */ -+int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) -+{ -+ int err; -+ long long calc = 0; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) -+ return 0; -+ -+ err = dbg_walk_index(c, NULL, add_size, &calc); -+ if (err) { -+ ubifs_err("error %d while walking the index", err); -+ return err; -+ } -+ -+ if (calc != idx_size) { -+ ubifs_err("index size check failed: calculated size is %lld, " -+ "should be %lld", calc, idx_size); -+ dump_stack(); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+/** -+ * struct fsck_inode - information about an inode used when checking the file-system. -+ * @rb: link in the RB-tree of inodes -+ * @inum: inode number -+ * @mode: inode type, permissions, etc -+ * @nlink: inode link count -+ * @xattr_cnt: count of extended attributes -+ * @references: how many directory/xattr entries refer this inode (calculated -+ * while walking the index) -+ * @calc_cnt: for directory inode count of child directories -+ * @size: inode size (read from on-flash inode) -+ * @xattr_sz: summary size of all extended attributes (read from on-flash -+ * inode) -+ * @calc_sz: for directories calculated directory size -+ * @calc_xcnt: count of extended attributes -+ * @calc_xsz: calculated summary size of all extended attributes -+ * @xattr_nms: sum of lengths of all extended attribute names belonging to this -+ * inode (read from on-flash inode) -+ * @calc_xnms: calculated sum of lengths of all extended attribute names -+ */ -+struct fsck_inode { -+ struct rb_node rb; -+ ino_t inum; -+ umode_t mode; -+ unsigned int nlink; -+ unsigned int xattr_cnt; -+ int references; -+ int calc_cnt; -+ long long size; -+ unsigned int xattr_sz; -+ long long calc_sz; -+ long long calc_xcnt; -+ long long calc_xsz; -+ unsigned int xattr_nms; -+ long long calc_xnms; -+}; -+ -+/** -+ * struct fsck_data - private FS checking information. -+ * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects) -+ */ -+struct fsck_data { -+ struct rb_root inodes; -+}; -+ -+/** -+ * add_inode - add inode information to RB-tree of inodes. -+ * @c: UBIFS file-system description object -+ * @fsckd: FS checking information -+ * @ino: raw UBIFS inode to add -+ * -+ * This is a helper function for 'check_leaf()' which adds information about -+ * inode @ino to the RB-tree of inodes. Returns inode information pointer in -+ * case of success and a negative error code in case of failure. -+ */ -+static struct fsck_inode *add_inode(struct ubifs_info *c, -+ struct fsck_data *fsckd, -+ struct ubifs_ino_node *ino) -+{ -+ struct rb_node **p, *parent = NULL; -+ struct fsck_inode *fscki; -+ ino_t inum = key_inum_flash(c, &ino->key); -+ -+ p = &fsckd->inodes.rb_node; -+ while (*p) { -+ parent = *p; -+ fscki = rb_entry(parent, struct fsck_inode, rb); -+ if (inum < fscki->inum) -+ p = &(*p)->rb_left; -+ else if (inum > fscki->inum) -+ p = &(*p)->rb_right; -+ else -+ return fscki; -+ } -+ -+ if (inum > c->highest_inum) { -+ ubifs_err("too high inode number, max. is %lu", -+ (unsigned long)c->highest_inum); -+ return ERR_PTR(-EINVAL); -+ } -+ -+ fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS); -+ if (!fscki) -+ return ERR_PTR(-ENOMEM); -+ -+ fscki->inum = inum; -+ fscki->nlink = le32_to_cpu(ino->nlink); -+ fscki->size = le64_to_cpu(ino->size); -+ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); -+ fscki->xattr_sz = le32_to_cpu(ino->xattr_size); -+ fscki->xattr_nms = le32_to_cpu(ino->xattr_names); -+ fscki->mode = le32_to_cpu(ino->mode); -+ if (S_ISDIR(fscki->mode)) { -+ fscki->calc_sz = UBIFS_INO_NODE_SZ; -+ fscki->calc_cnt = 2; -+ } -+ rb_link_node(&fscki->rb, parent, p); -+ rb_insert_color(&fscki->rb, &fsckd->inodes); -+ return fscki; -+} -+ -+/** -+ * search_inode - search inode in the RB-tree of inodes. -+ * @fsckd: FS checking information -+ * @inum: inode number to search -+ * -+ * This is a helper function for 'check_leaf()' which searches inode @inum in -+ * the RB-tree of inodes and returns an inode information pointer or %NULL if -+ * the inode was not found. -+ */ -+static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum) -+{ -+ struct rb_node *p; -+ struct fsck_inode *fscki; -+ -+ p = fsckd->inodes.rb_node; -+ while (p) { -+ fscki = rb_entry(p, struct fsck_inode, rb); -+ if (inum < fscki->inum) -+ p = p->rb_left; -+ else if (inum > fscki->inum) -+ p = p->rb_right; -+ else -+ return fscki; -+ } -+ return NULL; -+} -+ -+/** -+ * read_add_inode - read inode node and add it to RB-tree of inodes. -+ * @c: UBIFS file-system description object -+ * @fsckd: FS checking information -+ * @inum: inode number to read -+ * -+ * This is a helper function for 'check_leaf()' which finds inode node @inum in -+ * the index, reads it, and adds it to the RB-tree of inodes. Returns inode -+ * information pointer in case of success and a negative error code in case of -+ * failure. -+ */ -+static struct fsck_inode *read_add_inode(struct ubifs_info *c, -+ struct fsck_data *fsckd, ino_t inum) -+{ -+ int n, err; -+ union ubifs_key key; -+ struct ubifs_znode *znode; -+ struct ubifs_zbranch *zbr; -+ struct ubifs_ino_node *ino; -+ struct fsck_inode *fscki; -+ -+ fscki = search_inode(fsckd, inum); -+ if (fscki) -+ return fscki; -+ -+ ino_key_init(c, &key, inum); -+ err = ubifs_lookup_level0(c, &key, &znode, &n); -+ if (!err) { -+ ubifs_err("inode %lu not found in index", (unsigned long)inum); -+ return ERR_PTR(-ENOENT); -+ } else if (err < 0) { -+ ubifs_err("error %d while looking up inode %lu", -+ err, (unsigned long)inum); -+ return ERR_PTR(err); -+ } -+ -+ zbr = &znode->zbranch[n]; -+ if (zbr->len < UBIFS_INO_NODE_SZ) { -+ ubifs_err("bad node %lu node length %d", -+ (unsigned long)inum, zbr->len); -+ return ERR_PTR(-EINVAL); -+ } -+ -+ ino = kmalloc(zbr->len, GFP_NOFS); -+ if (!ino) -+ return ERR_PTR(-ENOMEM); -+ -+ err = ubifs_tnc_read_node(c, zbr, ino); -+ if (err) { -+ ubifs_err("cannot read inode node at LEB %d:%d, error %d", -+ zbr->lnum, zbr->offs, err); -+ kfree(ino); -+ return ERR_PTR(err); -+ } -+ -+ fscki = add_inode(c, fsckd, ino); -+ kfree(ino); -+ if (IS_ERR(fscki)) { -+ ubifs_err("error %ld while adding inode %lu node", -+ PTR_ERR(fscki), (unsigned long)inum); -+ return fscki; -+ } -+ -+ return fscki; -+} -+ -+/** -+ * check_leaf - check leaf node. -+ * @c: UBIFS file-system description object -+ * @zbr: zbranch of the leaf node to check -+ * @priv: FS checking information -+ * -+ * This is a helper function for 'dbg_check_filesystem()' which is called for -+ * every single leaf node while walking the indexing tree. It checks that the -+ * leaf node referred from the indexing tree exists, has correct CRC, and does -+ * some other basic validation. This function is also responsible for building -+ * an RB-tree of inodes - it adds all inodes into the RB-tree. It also -+ * calculates reference count, size, etc for each inode in order to later -+ * compare them to the information stored inside the inodes and detect possible -+ * inconsistencies. Returns zero in case of success and a negative error code -+ * in case of failure. -+ */ -+static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ void *priv) -+{ -+ ino_t inum; -+ void *node; -+ int err, type = key_type(c, &zbr->key); -+ struct fsck_inode *fscki; -+ -+ if (zbr->len < UBIFS_CH_SZ) { -+ ubifs_err("bad leaf length %d (LEB %d:%d)", -+ zbr->len, zbr->lnum, zbr->offs); -+ return -EINVAL; -+ } -+ -+ node = kmalloc(zbr->len, GFP_NOFS); -+ if (!node) -+ return -ENOMEM; -+ -+ err = ubifs_tnc_read_node(c, zbr, node); -+ if (err) { -+ ubifs_err("cannot read leaf node at LEB %d:%d, error %d", -+ zbr->lnum, zbr->offs, err); -+ goto out_free; -+ } -+ -+ /* If this is an inode node, add it to RB-tree of inodes */ -+ if (type == UBIFS_INO_KEY) { -+ fscki = add_inode(c, priv, node); -+ if (IS_ERR(fscki)) { -+ err = PTR_ERR(fscki); -+ ubifs_err("error %d while adding inode node", err); -+ goto out_dump; -+ } -+ goto out; -+ } -+ -+ if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && -+ type != UBIFS_DATA_KEY) { -+ ubifs_err("unexpected node type %d at LEB %d:%d", -+ type, zbr->lnum, zbr->offs); -+ err = -EINVAL; -+ goto out_free; -+ } -+ -+ if (type == UBIFS_DATA_KEY) { -+ long long blk_offs; -+ struct ubifs_data_node *dn = node; -+ -+ /* -+ * Search the inode node this data node belongs to and insert -+ * it to the RB-tree of inodes. -+ */ -+ inum = key_inum_flash(c, &dn->key); -+ fscki = read_add_inode(c, priv, inum); -+ if (IS_ERR(fscki)) { -+ err = PTR_ERR(fscki); -+ ubifs_err("error %d while processing data node and " -+ "trying to find inode node %lu", -+ err, (unsigned long)inum); -+ goto out_dump; -+ } -+ -+ /* Make sure the data node is within inode size */ -+ blk_offs = key_block_flash(c, &dn->key); -+ blk_offs <<= UBIFS_BLOCK_SHIFT; -+ blk_offs += le32_to_cpu(dn->size); -+ if (blk_offs > fscki->size) { -+ ubifs_err("data node at LEB %d:%d is not within inode " -+ "size %lld", zbr->lnum, zbr->offs, -+ fscki->size); -+ err = -EINVAL; -+ goto out_dump; -+ } -+ } else { -+ int nlen; -+ struct ubifs_dent_node *dent = node; -+ struct fsck_inode *fscki1; -+ -+ err = ubifs_validate_entry(c, dent); -+ if (err) -+ goto out_dump; -+ -+ /* -+ * Search the inode node this entry refers to and the parent -+ * inode node and insert them to the RB-tree of inodes. -+ */ -+ inum = le64_to_cpu(dent->inum); -+ fscki = read_add_inode(c, priv, inum); -+ if (IS_ERR(fscki)) { -+ err = PTR_ERR(fscki); -+ ubifs_err("error %d while processing entry node and " -+ "trying to find inode node %lu", -+ err, (unsigned long)inum); -+ goto out_dump; -+ } -+ -+ /* Count how many direntries or xentries refers this inode */ -+ fscki->references += 1; -+ -+ inum = key_inum_flash(c, &dent->key); -+ fscki1 = read_add_inode(c, priv, inum); -+ if (IS_ERR(fscki1)) { -+ err = PTR_ERR(fscki); -+ ubifs_err("error %d while processing entry node and " -+ "trying to find parent inode node %lu", -+ err, (unsigned long)inum); -+ goto out_dump; -+ } -+ -+ nlen = le16_to_cpu(dent->nlen); -+ if (type == UBIFS_XENT_KEY) { -+ fscki1->calc_xcnt += 1; -+ fscki1->calc_xsz += CALC_DENT_SIZE(nlen); -+ fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size); -+ fscki1->calc_xnms += nlen; -+ } else { -+ fscki1->calc_sz += CALC_DENT_SIZE(nlen); -+ if (dent->type == UBIFS_ITYPE_DIR) -+ fscki1->calc_cnt += 1; -+ } -+ } -+ -+out: -+ kfree(node); -+ return 0; -+ -+out_dump: -+ ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); -+ dbg_dump_node(c, node); -+out_free: -+ kfree(node); -+ return err; -+} -+ -+/** -+ * free_inodes - free RB-tree of inodes. -+ * @fsckd: FS checking information -+ */ -+static void free_inodes(struct fsck_data *fsckd) -+{ -+ struct rb_node *this = fsckd->inodes.rb_node; -+ struct fsck_inode *fscki; -+ -+ while (this) { -+ if (this->rb_left) -+ this = this->rb_left; -+ else if (this->rb_right) -+ this = this->rb_right; -+ else { -+ fscki = rb_entry(this, struct fsck_inode, rb); -+ this = rb_parent(this); -+ if (this) { -+ if (this->rb_left == &fscki->rb) -+ this->rb_left = NULL; -+ else -+ this->rb_right = NULL; -+ } -+ kfree(fscki); -+ } -+ } -+} -+ -+/** -+ * check_inodes - checks all inodes. -+ * @c: UBIFS file-system description object -+ * @fsckd: FS checking information -+ * -+ * This is a helper function for 'dbg_check_filesystem()' which walks the -+ * RB-tree of inodes after the index scan has been finished, and checks that -+ * inode nlink, size, etc are correct. Returns zero if inodes are fine, -+ * %-EINVAL if not, and a negative error code in case of failure. -+ */ -+static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) -+{ -+ int n, err; -+ union ubifs_key key; -+ struct ubifs_znode *znode; -+ struct ubifs_zbranch *zbr; -+ struct ubifs_ino_node *ino; -+ struct fsck_inode *fscki; -+ struct rb_node *this = rb_first(&fsckd->inodes); -+ -+ while (this) { -+ fscki = rb_entry(this, struct fsck_inode, rb); -+ this = rb_next(this); -+ -+ if (S_ISDIR(fscki->mode)) { -+ /* -+ * Directories have to have exactly one reference (they -+ * cannot have hardlinks), although root inode is an -+ * exception. -+ */ -+ if (fscki->inum != UBIFS_ROOT_INO && -+ fscki->references != 1) { -+ ubifs_err("directory inode %lu has %d " -+ "direntries which refer it, but " -+ "should be 1", -+ (unsigned long)fscki->inum, -+ fscki->references); -+ goto out_dump; -+ } -+ if (fscki->inum == UBIFS_ROOT_INO && -+ fscki->references != 0) { -+ ubifs_err("root inode %lu has non-zero (%d) " -+ "direntries which refer it", -+ (unsigned long)fscki->inum, -+ fscki->references); -+ goto out_dump; -+ } -+ if (fscki->calc_sz != fscki->size) { -+ ubifs_err("directory inode %lu size is %lld, " -+ "but calculated size is %lld", -+ (unsigned long)fscki->inum, -+ fscki->size, fscki->calc_sz); -+ goto out_dump; -+ } -+ if (fscki->calc_cnt != fscki->nlink) { -+ ubifs_err("directory inode %lu nlink is %d, " -+ "but calculated nlink is %d", -+ (unsigned long)fscki->inum, -+ fscki->nlink, fscki->calc_cnt); -+ goto out_dump; -+ } -+ } else { -+ if (fscki->references != fscki->nlink) { -+ ubifs_err("inode %lu nlink is %d, but " -+ "calculated nlink is %d", -+ (unsigned long)fscki->inum, -+ fscki->nlink, fscki->references); -+ goto out_dump; -+ } -+ } -+ if (fscki->xattr_sz != fscki->calc_xsz) { -+ ubifs_err("inode %lu has xattr size %u, but " -+ "calculated size is %lld", -+ (unsigned long)fscki->inum, fscki->xattr_sz, -+ fscki->calc_xsz); -+ goto out_dump; -+ } -+ if (fscki->xattr_cnt != fscki->calc_xcnt) { -+ ubifs_err("inode %lu has %u xattrs, but " -+ "calculated count is %lld", -+ (unsigned long)fscki->inum, -+ fscki->xattr_cnt, fscki->calc_xcnt); -+ goto out_dump; -+ } -+ if (fscki->xattr_nms != fscki->calc_xnms) { -+ ubifs_err("inode %lu has xattr names' size %u, but " -+ "calculated names' size is %lld", -+ (unsigned long)fscki->inum, fscki->xattr_nms, -+ fscki->calc_xnms); -+ goto out_dump; -+ } -+ } -+ -+ return 0; -+ -+out_dump: -+ /* Read the bad inode and dump it */ -+ ino_key_init(c, &key, fscki->inum); -+ err = ubifs_lookup_level0(c, &key, &znode, &n); -+ if (!err) { -+ ubifs_err("inode %lu not found in index", -+ (unsigned long)fscki->inum); -+ return -ENOENT; -+ } else if (err < 0) { -+ ubifs_err("error %d while looking up inode %lu", -+ err, (unsigned long)fscki->inum); -+ return err; -+ } -+ -+ zbr = &znode->zbranch[n]; -+ ino = kmalloc(zbr->len, GFP_NOFS); -+ if (!ino) -+ return -ENOMEM; -+ -+ err = ubifs_tnc_read_node(c, zbr, ino); -+ if (err) { -+ ubifs_err("cannot read inode node at LEB %d:%d, error %d", -+ zbr->lnum, zbr->offs, err); -+ kfree(ino); -+ return err; -+ } -+ -+ ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", -+ (unsigned long)fscki->inum, zbr->lnum, zbr->offs); -+ dbg_dump_node(c, ino); -+ kfree(ino); -+ return -EINVAL; -+} -+ -+/** -+ * dbg_check_filesystem - check the file-system. -+ * @c: UBIFS file-system description object -+ * -+ * This function checks the file system, namely: -+ * o makes sure that all leaf nodes exist and their CRCs are correct; -+ * o makes sure inode nlink, size, xattr size/count are correct (for all -+ * inodes). -+ * -+ * The function reads whole indexing tree and all nodes, so it is pretty -+ * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if -+ * not, and a negative error code in case of failure. -+ */ -+int dbg_check_filesystem(struct ubifs_info *c) -+{ -+ int err; -+ struct fsck_data fsckd; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_FS)) -+ return 0; -+ -+ fsckd.inodes = RB_ROOT; -+ err = dbg_walk_index(c, check_leaf, NULL, &fsckd); -+ if (err) -+ goto out_free; -+ -+ err = check_inodes(c, &fsckd); -+ if (err) -+ goto out_free; -+ -+ free_inodes(&fsckd); -+ return 0; -+ -+out_free: -+ ubifs_err("file-system check failed with error %d", err); -+ dump_stack(); -+ free_inodes(&fsckd); -+ return err; -+} -+ -+static int invocation_cnt; -+ -+int dbg_force_in_the_gaps(void) -+{ -+ if (!dbg_force_in_the_gaps_enabled) -+ return 0; -+ /* Force in-the-gaps every 8th commit */ -+ return !((invocation_cnt++) & 0x7); -+} -+ -+/* Failure mode for recovery testing */ -+ -+#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) -+ -+struct failure_mode_info { -+ struct list_head list; -+ struct ubifs_info *c; -+}; -+ -+static LIST_HEAD(fmi_list); -+static DEFINE_SPINLOCK(fmi_lock); -+ -+static unsigned int next; -+ -+static int simple_rand(void) -+{ -+ if (next == 0) -+ next = current->pid; -+ next = next * 1103515245 + 12345; -+ return (next >> 16) & 32767; -+} -+ -+static void failure_mode_init(struct ubifs_info *c) -+{ -+ struct failure_mode_info *fmi; -+ -+ fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); -+ if (!fmi) { -+ ubifs_err("Failed to register failure mode - no memory"); -+ return; -+ } -+ fmi->c = c; -+ spin_lock(&fmi_lock); -+ list_add_tail(&fmi->list, &fmi_list); -+ spin_unlock(&fmi_lock); -+} -+ -+static void failure_mode_exit(struct ubifs_info *c) -+{ -+ struct failure_mode_info *fmi, *tmp; -+ -+ spin_lock(&fmi_lock); -+ list_for_each_entry_safe(fmi, tmp, &fmi_list, list) -+ if (fmi->c == c) { -+ list_del(&fmi->list); -+ kfree(fmi); -+ } -+ spin_unlock(&fmi_lock); -+} -+ -+static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) -+{ -+ struct failure_mode_info *fmi; -+ -+ spin_lock(&fmi_lock); -+ list_for_each_entry(fmi, &fmi_list, list) -+ if (fmi->c->ubi == desc) { -+ struct ubifs_info *c = fmi->c; -+ -+ spin_unlock(&fmi_lock); -+ return c; -+ } -+ spin_unlock(&fmi_lock); -+ return NULL; -+} -+ -+static int in_failure_mode(struct ubi_volume_desc *desc) -+{ -+ struct ubifs_info *c = dbg_find_info(desc); -+ -+ if (c && dbg_failure_mode) -+ return c->dbg->failure_mode; -+ return 0; -+} -+ -+static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) -+{ -+ struct ubifs_info *c = dbg_find_info(desc); -+ struct ubifs_debug_info *d; -+ -+ if (!c || !dbg_failure_mode) -+ return 0; -+ d = c->dbg; -+ if (d->failure_mode) -+ return 1; -+ if (!d->fail_cnt) { -+ /* First call - decide delay to failure */ -+ if (chance(1, 2)) { -+ unsigned int delay = 1 << (simple_rand() >> 11); -+ -+ if (chance(1, 2)) { -+ d->fail_delay = 1; -+ d->fail_timeout = jiffies + -+ msecs_to_jiffies(delay); -+ dbg_rcvry("failing after %ums", delay); -+ } else { -+ d->fail_delay = 2; -+ d->fail_cnt_max = delay; -+ dbg_rcvry("failing after %u calls", delay); -+ } -+ } -+ d->fail_cnt += 1; -+ } -+ /* Determine if failure delay has expired */ -+ if (d->fail_delay == 1) { -+ if (time_before(jiffies, d->fail_timeout)) -+ return 0; -+ } else if (d->fail_delay == 2) -+ if (d->fail_cnt++ < d->fail_cnt_max) -+ return 0; -+ if (lnum == UBIFS_SB_LNUM) { -+ if (write) { -+ if (chance(1, 2)) -+ return 0; -+ } else if (chance(19, 20)) -+ return 0; -+ dbg_rcvry("failing in super block LEB %d", lnum); -+ } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { -+ if (chance(19, 20)) -+ return 0; -+ dbg_rcvry("failing in master LEB %d", lnum); -+ } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { -+ if (write) { -+ if (chance(99, 100)) -+ return 0; -+ } else if (chance(399, 400)) -+ return 0; -+ dbg_rcvry("failing in log LEB %d", lnum); -+ } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { -+ if (write) { -+ if (chance(7, 8)) -+ return 0; -+ } else if (chance(19, 20)) -+ return 0; -+ dbg_rcvry("failing in LPT LEB %d", lnum); -+ } else if (lnum >= c->orph_first && lnum <= c->orph_last) { -+ if (write) { -+ if (chance(1, 2)) -+ return 0; -+ } else if (chance(9, 10)) -+ return 0; -+ dbg_rcvry("failing in orphan LEB %d", lnum); -+ } else if (lnum == c->ihead_lnum) { -+ if (chance(99, 100)) -+ return 0; -+ dbg_rcvry("failing in index head LEB %d", lnum); -+ } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { -+ if (chance(9, 10)) -+ return 0; -+ dbg_rcvry("failing in GC head LEB %d", lnum); -+ } else if (write && !RB_EMPTY_ROOT(&c->buds) && -+ !ubifs_search_bud(c, lnum)) { -+ if (chance(19, 20)) -+ return 0; -+ dbg_rcvry("failing in non-bud LEB %d", lnum); -+ } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || -+ c->cmt_state == COMMIT_RUNNING_REQUIRED) { -+ if (chance(999, 1000)) -+ return 0; -+ dbg_rcvry("failing in bud LEB %d commit running", lnum); -+ } else { -+ if (chance(9999, 10000)) -+ return 0; -+ dbg_rcvry("failing in bud LEB %d commit not running", lnum); -+ } -+ ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); -+ d->failure_mode = 1; -+ dump_stack(); -+ return 1; -+} -+ -+static void cut_data(const void *buf, int len) -+{ -+ int flen, i; -+ unsigned char *p = (void *)buf; -+ -+ flen = (len * (long long)simple_rand()) >> 15; -+ for (i = flen; i < len; i++) -+ p[i] = 0xff; -+} -+ -+int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, -+ int len, int check) -+{ -+ if (in_failure_mode(desc)) -+ return -EIO; -+ return ubi_leb_read(desc, lnum, buf, offset, len, check); -+} -+ -+int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, -+ int offset, int len, int dtype) -+{ -+ int err, failing; -+ -+ if (in_failure_mode(desc)) -+ return -EIO; -+ failing = do_fail(desc, lnum, 1); -+ if (failing) -+ cut_data(buf, len); -+ err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); -+ if (err) -+ return err; -+ if (failing) -+ return -EIO; -+ return 0; -+} -+ -+int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, -+ int len, int dtype) -+{ -+ int err; -+ -+ if (do_fail(desc, lnum, 1)) -+ return -EIO; -+ err = ubi_leb_change(desc, lnum, buf, len, dtype); -+ if (err) -+ return err; -+ if (do_fail(desc, lnum, 1)) -+ return -EIO; -+ return 0; -+} -+ -+int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) -+{ -+ int err; -+ -+ if (do_fail(desc, lnum, 0)) -+ return -EIO; -+ err = ubi_leb_erase(desc, lnum); -+ if (err) -+ return err; -+ if (do_fail(desc, lnum, 0)) -+ return -EIO; -+ return 0; -+} -+ -+int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) -+{ -+ int err; -+ -+ if (do_fail(desc, lnum, 0)) -+ return -EIO; -+ err = ubi_leb_unmap(desc, lnum); -+ if (err) -+ return err; -+ if (do_fail(desc, lnum, 0)) -+ return -EIO; -+ return 0; -+} -+ -+int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) -+{ -+ if (in_failure_mode(desc)) -+ return -EIO; -+ return ubi_is_mapped(desc, lnum); -+} -+ -+int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) -+{ -+ int err; -+ -+ if (do_fail(desc, lnum, 0)) -+ return -EIO; -+ err = ubi_leb_map(desc, lnum, dtype); -+ if (err) -+ return err; -+ if (do_fail(desc, lnum, 0)) -+ return -EIO; -+ return 0; -+} -+ -+/** -+ * ubifs_debugging_init - initialize UBIFS debugging. -+ * @c: UBIFS file-system description object -+ * -+ * This function initializes debugging-related data for the file system. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubifs_debugging_init(struct ubifs_info *c) -+{ -+ c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); -+ if (!c->dbg) -+ return -ENOMEM; -+ -+ c->dbg->buf = vmalloc(c->leb_size); -+ if (!c->dbg->buf) -+ goto out; -+ -+ failure_mode_init(c); -+ return 0; -+ -+out: -+ kfree(c->dbg); -+ return -ENOMEM; -+} -+ -+/** -+ * ubifs_debugging_exit - free debugging data. -+ * @c: UBIFS file-system description object -+ */ -+void ubifs_debugging_exit(struct ubifs_info *c) -+{ -+ failure_mode_exit(c); -+ vfree(c->dbg->buf); -+ kfree(c->dbg); -+} -+ -+/* -+ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which -+ * contain the stuff specific to particular file-system mounts. -+ */ -+static struct dentry *dfs_rootdir; -+ -+/** -+ * dbg_debugfs_init - initialize debugfs file-system. -+ * -+ * UBIFS uses debugfs file-system to expose various debugging knobs to -+ * user-space. This function creates "ubifs" directory in the debugfs -+ * file-system. Returns zero in case of success and a negative error code in -+ * case of failure. -+ */ -+int dbg_debugfs_init(void) -+{ -+ dfs_rootdir = debugfs_create_dir("ubifs", NULL); -+ if (IS_ERR(dfs_rootdir)) { -+ int err = PTR_ERR(dfs_rootdir); -+ ubifs_err("cannot create \"ubifs\" debugfs directory, " -+ "error %d\n", err); -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. -+ */ -+void dbg_debugfs_exit(void) -+{ -+ debugfs_remove(dfs_rootdir); -+} -+ -+static int open_debugfs_file(struct inode *inode, struct file *file) -+{ -+ file->private_data = inode->i_private; -+ return 0; -+} -+ -+static ssize_t write_debugfs_file(struct file *file, const char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct ubifs_info *c = file->private_data; -+ struct ubifs_debug_info *d = c->dbg; -+ -+ if (file->f_path.dentry == d->dfs_dump_lprops) -+ dbg_dump_lprops(c); -+ else if (file->f_path.dentry == d->dfs_dump_budg) { -+ spin_lock(&c->space_lock); -+ dbg_dump_budg(c); -+ spin_unlock(&c->space_lock); -+ } else if (file->f_path.dentry == d->dfs_dump_tnc) { -+ mutex_lock(&c->tnc_mutex); -+ dbg_dump_tnc(c); -+ mutex_unlock(&c->tnc_mutex); -+ } else -+ return -EINVAL; -+ -+ *ppos += count; -+ return count; -+} -+ -+static const struct file_operations dfs_fops = { -+ .open = open_debugfs_file, -+ .write = write_debugfs_file, -+ .owner = THIS_MODULE, -+}; -+ -+/** -+ * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance. -+ * @c: UBIFS file-system description object -+ * -+ * This function creates all debugfs files for this instance of UBIFS. Returns -+ * zero in case of success and a negative error code in case of failure. -+ * -+ * Note, the only reason we have not merged this function with the -+ * 'ubifs_debugging_init()' function is because it is better to initialize -+ * debugfs interfaces at the very end of the mount process, and remove them at -+ * the very beginning of the mount process. -+ */ -+int dbg_debugfs_init_fs(struct ubifs_info *c) -+{ -+ int err; -+ const char *fname; -+ struct dentry *dent; -+ struct ubifs_debug_info *d = c->dbg; -+ -+ sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); -+ d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); -+ if (IS_ERR(d->dfs_dir)) { -+ err = PTR_ERR(d->dfs_dir); -+ ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", -+ d->dfs_dir_name, err); -+ goto out; -+ } -+ -+ fname = "dump_lprops"; -+ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); -+ if (IS_ERR(dent)) -+ goto out_remove; -+ d->dfs_dump_lprops = dent; -+ -+ fname = "dump_budg"; -+ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); -+ if (IS_ERR(dent)) -+ goto out_remove; -+ d->dfs_dump_budg = dent; -+ -+ fname = "dump_tnc"; -+ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); -+ if (IS_ERR(dent)) -+ goto out_remove; -+ d->dfs_dump_tnc = dent; -+ -+ return 0; -+ -+out_remove: -+ err = PTR_ERR(dent); -+ ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", -+ fname, err); -+ if (d->dfs_dump_tnc) -+ debugfs_remove(d->dfs_dump_tnc); -+ if (d->dfs_dump_budg) -+ debugfs_remove(d->dfs_dump_budg); -+ if (d->dfs_dump_lprops) -+ debugfs_remove(d->dfs_dump_lprops); -+ debugfs_remove(d->dfs_dir); -+out: -+ return err; -+} -+ -+/** -+ * dbg_debugfs_exit_fs - remove all debugfs files. -+ * @c: UBIFS file-system description object -+ */ -+void dbg_debugfs_exit_fs(struct ubifs_info *c) -+{ -+ struct ubifs_debug_info *d = c->dbg; -+ -+ debugfs_remove(d->dfs_dump_tnc); -+ debugfs_remove(d->dfs_dump_budg); -+ debugfs_remove(d->dfs_dump_lprops); -+ debugfs_remove(d->dfs_dir); -+} -+ -+#endif /* CONFIG_UBIFS_FS_DEBUG */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/debug.h linux-2.6.24/fs/ubifs/debug.h ---- linux-2.6.24.orig/fs/ubifs/debug.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/debug.h 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,486 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+#ifndef __UBIFS_DEBUG_H__ -+#define __UBIFS_DEBUG_H__ -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+/** -+ * ubifs_debug_info - per-FS debugging information. -+ * @buf: a buffer of LEB size, used for various purposes -+ * @old_zroot: old index root - used by 'dbg_check_old_index()' -+ * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' -+ * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' -+ * @failure_mode: failure mode for recovery testing -+ * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls -+ * @fail_timeout: time in jiffies when delay of failure mode expires -+ * @fail_cnt: current number of calls to failure mode I/O functions -+ * @fail_cnt_max: number of calls by which to delay failure mode -+ * @chk_lpt_sz: used by LPT tree size checker -+ * @chk_lpt_sz2: used by LPT tree size checker -+ * @chk_lpt_wastage: used by LPT tree size checker -+ * @chk_lpt_lebs: used by LPT tree size checker -+ * @new_nhead_offs: used by LPT tree size checker -+ * @new_ihead_lnum: used by debugging to check @c->ihead_lnum -+ * @new_ihead_offs: used by debugging to check @c->ihead_offs -+ * -+ * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') -+ * @saved_free: saved free space (used by 'dbg_save_space_info()') -+ * -+ * dfs_dir_name: name of debugfs directory containing this file-system's files -+ * dfs_dir: direntry object of the file-system debugfs directory -+ * dfs_dump_lprops: "dump lprops" debugfs knob -+ * dfs_dump_budg: "dump budgeting information" debugfs knob -+ * dfs_dump_tnc: "dump TNC" debugfs knob -+ */ -+struct ubifs_debug_info { -+ void *buf; -+ struct ubifs_zbranch old_zroot; -+ int old_zroot_level; -+ unsigned long long old_zroot_sqnum; -+ int failure_mode; -+ int fail_delay; -+ unsigned long fail_timeout; -+ unsigned int fail_cnt; -+ unsigned int fail_cnt_max; -+ long long chk_lpt_sz; -+ long long chk_lpt_sz2; -+ long long chk_lpt_wastage; -+ int chk_lpt_lebs; -+ int new_nhead_offs; -+ int new_ihead_lnum; -+ int new_ihead_offs; -+ -+ struct ubifs_lp_stats saved_lst; -+ long long saved_free; -+ -+ char dfs_dir_name[100]; -+ struct dentry *dfs_dir; -+ struct dentry *dfs_dump_lprops; -+ struct dentry *dfs_dump_budg; -+ struct dentry *dfs_dump_tnc; -+}; -+ -+#define ubifs_assert(expr) do { \ -+ if (unlikely(!(expr))) { \ -+ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ -+ __func__, __LINE__, current->pid); \ -+ dbg_dump_stack(); \ -+ } \ -+} while (0) -+ -+#define ubifs_assert_cmt_locked(c) do { \ -+ if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ -+ up_write(&(c)->commit_sem); \ -+ printk(KERN_CRIT "commit lock is not locked!\n"); \ -+ ubifs_assert(0); \ -+ } \ -+} while (0) -+ -+#define dbg_dump_stack() do { \ -+ if (!dbg_failure_mode) \ -+ dump_stack(); \ -+} while (0) -+ -+/* Generic debugging messages */ -+#define dbg_msg(fmt, ...) do { \ -+ spin_lock(&dbg_lock); \ -+ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ -+ __func__, ##__VA_ARGS__); \ -+ spin_unlock(&dbg_lock); \ -+} while (0) -+ -+#define dbg_do_msg(typ, fmt, ...) do { \ -+ if (ubifs_msg_flags & typ) \ -+ dbg_msg(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+#define dbg_err(fmt, ...) do { \ -+ spin_lock(&dbg_lock); \ -+ ubifs_err(fmt, ##__VA_ARGS__); \ -+ spin_unlock(&dbg_lock); \ -+} while (0) -+ -+const char *dbg_key_str0(const struct ubifs_info *c, -+ const union ubifs_key *key); -+const char *dbg_key_str1(const struct ubifs_info *c, -+ const union ubifs_key *key); -+ -+/* -+ * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message -+ * macros. -+ */ -+#define DBGKEY(key) dbg_key_str0(c, (key)) -+#define DBGKEY1(key) dbg_key_str1(c, (key)) -+ -+/* General messages */ -+#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) -+ -+/* Additional journal messages */ -+#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) -+ -+/* Additional TNC messages */ -+#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) -+ -+/* Additional lprops messages */ -+#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) -+ -+/* Additional LEB find messages */ -+#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) -+ -+/* Additional mount messages */ -+#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) -+ -+/* Additional I/O messages */ -+#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) -+ -+/* Additional commit messages */ -+#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) -+ -+/* Additional budgeting messages */ -+#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) -+ -+/* Additional log messages */ -+#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) -+ -+/* Additional gc messages */ -+#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) -+ -+/* Additional scan messages */ -+#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) -+ -+/* Additional recovery messages */ -+#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) -+ -+/* -+ * Debugging message type flags (must match msg_type_names in debug.c). -+ * -+ * UBIFS_MSG_GEN: general messages -+ * UBIFS_MSG_JNL: journal messages -+ * UBIFS_MSG_MNT: mount messages -+ * UBIFS_MSG_CMT: commit messages -+ * UBIFS_MSG_FIND: LEB find messages -+ * UBIFS_MSG_BUDG: budgeting messages -+ * UBIFS_MSG_GC: garbage collection messages -+ * UBIFS_MSG_TNC: TNC messages -+ * UBIFS_MSG_LP: lprops messages -+ * UBIFS_MSG_IO: I/O messages -+ * UBIFS_MSG_LOG: log messages -+ * UBIFS_MSG_SCAN: scan messages -+ * UBIFS_MSG_RCVRY: recovery messages -+ */ -+enum { -+ UBIFS_MSG_GEN = 0x1, -+ UBIFS_MSG_JNL = 0x2, -+ UBIFS_MSG_MNT = 0x4, -+ UBIFS_MSG_CMT = 0x8, -+ UBIFS_MSG_FIND = 0x10, -+ UBIFS_MSG_BUDG = 0x20, -+ UBIFS_MSG_GC = 0x40, -+ UBIFS_MSG_TNC = 0x80, -+ UBIFS_MSG_LP = 0x100, -+ UBIFS_MSG_IO = 0x200, -+ UBIFS_MSG_LOG = 0x400, -+ UBIFS_MSG_SCAN = 0x800, -+ UBIFS_MSG_RCVRY = 0x1000, -+}; -+ -+/* Debugging message type flags for each default debug message level */ -+#define UBIFS_MSG_LVL_0 0 -+#define UBIFS_MSG_LVL_1 0x1 -+#define UBIFS_MSG_LVL_2 0x7f -+#define UBIFS_MSG_LVL_3 0xffff -+ -+/* -+ * Debugging check flags (must match chk_names in debug.c). -+ * -+ * UBIFS_CHK_GEN: general checks -+ * UBIFS_CHK_TNC: check TNC -+ * UBIFS_CHK_IDX_SZ: check index size -+ * UBIFS_CHK_ORPH: check orphans -+ * UBIFS_CHK_OLD_IDX: check the old index -+ * UBIFS_CHK_LPROPS: check lprops -+ * UBIFS_CHK_FS: check the file-system -+ */ -+enum { -+ UBIFS_CHK_GEN = 0x1, -+ UBIFS_CHK_TNC = 0x2, -+ UBIFS_CHK_IDX_SZ = 0x4, -+ UBIFS_CHK_ORPH = 0x8, -+ UBIFS_CHK_OLD_IDX = 0x10, -+ UBIFS_CHK_LPROPS = 0x20, -+ UBIFS_CHK_FS = 0x40, -+}; -+ -+/* -+ * Special testing flags (must match tst_names in debug.c). -+ * -+ * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method -+ * UBIFS_TST_RCVRY: failure mode for recovery testing -+ */ -+enum { -+ UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, -+ UBIFS_TST_RCVRY = 0x4, -+}; -+ -+#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 -+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 -+#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 -+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 -+#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 -+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 -+#else -+#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 -+#endif -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS -+#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff -+#else -+#define UBIFS_CHK_FLAGS_DEFAULT 0 -+#endif -+ -+extern spinlock_t dbg_lock; -+ -+extern unsigned int ubifs_msg_flags; -+extern unsigned int ubifs_chk_flags; -+extern unsigned int ubifs_tst_flags; -+ -+int ubifs_debugging_init(struct ubifs_info *c); -+void ubifs_debugging_exit(struct ubifs_info *c); -+ -+/* Dump functions */ -+const char *dbg_ntype(int type); -+const char *dbg_cstate(int cmt_state); -+const char *dbg_get_key_dump(const struct ubifs_info *c, -+ const union ubifs_key *key); -+void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); -+void dbg_dump_node(const struct ubifs_info *c, const void *node); -+void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, -+ int offs); -+void dbg_dump_budget_req(const struct ubifs_budget_req *req); -+void dbg_dump_lstats(const struct ubifs_lp_stats *lst); -+void dbg_dump_budg(struct ubifs_info *c); -+void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); -+void dbg_dump_lprops(struct ubifs_info *c); -+void dbg_dump_lpt_info(struct ubifs_info *c); -+void dbg_dump_leb(const struct ubifs_info *c, int lnum); -+void dbg_dump_znode(const struct ubifs_info *c, -+ const struct ubifs_znode *znode); -+void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); -+void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, -+ struct ubifs_nnode *parent, int iip); -+void dbg_dump_tnc(struct ubifs_info *c); -+void dbg_dump_index(struct ubifs_info *c); -+void dbg_dump_lpt_lebs(const struct ubifs_info *c); -+ -+/* Checking helper functions */ -+typedef int (*dbg_leaf_callback)(struct ubifs_info *c, -+ struct ubifs_zbranch *zbr, void *priv); -+typedef int (*dbg_znode_callback)(struct ubifs_info *c, -+ struct ubifs_znode *znode, void *priv); -+int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, -+ dbg_znode_callback znode_cb, void *priv); -+ -+/* Checking functions */ -+void dbg_save_space_info(struct ubifs_info *c); -+int dbg_check_space_info(struct ubifs_info *c); -+int dbg_check_lprops(struct ubifs_info *c); -+int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); -+int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); -+int dbg_check_cats(struct ubifs_info *c); -+int dbg_check_ltab(struct ubifs_info *c); -+int dbg_chk_lpt_free_spc(struct ubifs_info *c); -+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); -+int dbg_check_synced_i_size(struct inode *inode); -+int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); -+int dbg_check_tnc(struct ubifs_info *c, int extra); -+int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); -+int dbg_check_filesystem(struct ubifs_info *c); -+void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, -+ int add_pos); -+int dbg_check_lprops(struct ubifs_info *c); -+int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, -+ int row, int col); -+ -+/* Force the use of in-the-gaps method for testing */ -+ -+#define dbg_force_in_the_gaps_enabled \ -+ (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) -+ -+int dbg_force_in_the_gaps(void); -+ -+/* Failure mode for recovery testing */ -+ -+#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) -+ -+#ifndef UBIFS_DBG_PRESERVE_UBI -+ -+#define ubi_leb_read dbg_leb_read -+#define ubi_leb_write dbg_leb_write -+#define ubi_leb_change dbg_leb_change -+#define ubi_leb_erase dbg_leb_erase -+#define ubi_leb_unmap dbg_leb_unmap -+#define ubi_is_mapped dbg_is_mapped -+#define ubi_leb_map dbg_leb_map -+ -+#endif -+ -+int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, -+ int len, int check); -+int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, -+ int offset, int len, int dtype); -+int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, -+ int len, int dtype); -+int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); -+int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); -+int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); -+int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); -+ -+static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, -+ int offset, int len) -+{ -+ return dbg_leb_read(desc, lnum, buf, offset, len, 0); -+} -+ -+static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, -+ const void *buf, int offset, int len) -+{ -+ return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); -+} -+ -+static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, -+ const void *buf, int len) -+{ -+ return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); -+} -+ -+/* Debugfs-related stuff */ -+int dbg_debugfs_init(void); -+void dbg_debugfs_exit(void); -+int dbg_debugfs_init_fs(struct ubifs_info *c); -+void dbg_debugfs_exit_fs(struct ubifs_info *c); -+ -+#else /* !CONFIG_UBIFS_FS_DEBUG */ -+ -+/* Use "if (0)" to make compiler check arguments even if debugging is off */ -+#define ubifs_assert(expr) do { \ -+ if (0 && (expr)) \ -+ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ -+ __func__, __LINE__, current->pid); \ -+} while (0) -+ -+#define dbg_err(fmt, ...) do { \ -+ if (0) \ -+ ubifs_err(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+#define dbg_msg(fmt, ...) do { \ -+ if (0) \ -+ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ -+ current->pid, __func__, ##__VA_ARGS__); \ -+} while (0) -+ -+#define dbg_dump_stack() -+#define ubifs_assert_cmt_locked(c) -+ -+#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -+ -+#define DBGKEY(key) ((char *)(key)) -+#define DBGKEY1(key) ((char *)(key)) -+ -+#define ubifs_debugging_init(c) 0 -+#define ubifs_debugging_exit(c) ({}) -+ -+#define dbg_ntype(type) "" -+#define dbg_cstate(cmt_state) "" -+#define dbg_get_key_dump(c, key) ({}) -+#define dbg_dump_inode(c, inode) ({}) -+#define dbg_dump_node(c, node) ({}) -+#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) -+#define dbg_dump_budget_req(req) ({}) -+#define dbg_dump_lstats(lst) ({}) -+#define dbg_dump_budg(c) ({}) -+#define dbg_dump_lprop(c, lp) ({}) -+#define dbg_dump_lprops(c) ({}) -+#define dbg_dump_lpt_info(c) ({}) -+#define dbg_dump_leb(c, lnum) ({}) -+#define dbg_dump_znode(c, znode) ({}) -+#define dbg_dump_heap(c, heap, cat) ({}) -+#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -+#define dbg_dump_tnc(c) ({}) -+#define dbg_dump_index(c) ({}) -+#define dbg_dump_lpt_lebs(c) ({}) -+ -+#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 -+#define dbg_old_index_check_init(c, zroot) 0 -+#define dbg_save_space_info(c) ({}) -+#define dbg_check_space_info(c) 0 -+#define dbg_check_old_index(c, zroot) 0 -+#define dbg_check_cats(c) 0 -+#define dbg_check_ltab(c) 0 -+#define dbg_chk_lpt_free_spc(c) 0 -+#define dbg_chk_lpt_sz(c, action, len) 0 -+#define dbg_check_synced_i_size(inode) 0 -+#define dbg_check_dir_size(c, dir) 0 -+#define dbg_check_tnc(c, x) 0 -+#define dbg_check_idx_size(c, idx_size) 0 -+#define dbg_check_filesystem(c) 0 -+#define dbg_check_heap(c, heap, cat, add_pos) ({}) -+#define dbg_check_lprops(c) 0 -+#define dbg_check_lpt_nodes(c, cnode, row, col) 0 -+#define dbg_force_in_the_gaps_enabled 0 -+#define dbg_force_in_the_gaps() 0 -+#define dbg_failure_mode 0 -+ -+#define dbg_debugfs_init() 0 -+#define dbg_debugfs_exit() -+#define dbg_debugfs_init_fs(c) 0 -+#define dbg_debugfs_exit_fs(c) 0 -+ -+#endif /* !CONFIG_UBIFS_FS_DEBUG */ -+ -+/* -+ * Some compatibility stuff goes here. -+ */ -+ -+#include <asm/div64.h> -+ -+static inline uint64_t div_u64(uint64_t dividend, uint64_t divisor) -+{ -+ do_div(dividend, divisor); -+ return dividend; -+} -+ -+#endif /* !__UBIFS_DEBUG_H__ */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/dir.c linux-2.6.24/fs/ubifs/dir.c ---- linux-2.6.24.orig/fs/ubifs/dir.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/dir.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1215 @@ -+/* * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * Copyright (C) 2006, 2007 University of Szeged, Hungary -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ * Zoltan Sogor -+ */ -+ -+/* -+ * This file implements directory operations. -+ * -+ * All FS operations in this file allocate budget before writing anything to the -+ * media. If they fail to allocate it, the error is returned. The only -+ * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even -+ * if they unable to allocate the budget, because deletion %-ENOSPC failure is -+ * not what users are usually ready to get. UBIFS budgeting subsystem has some -+ * space reserved for these purposes. -+ * -+ * All operations in this file write all inodes which they change straight -+ * away, instead of marking them dirty. For example, 'ubifs_link()' changes -+ * @i_size of the parent inode and writes the parent inode together with the -+ * target inode. This was done to simplify file-system recovery which would -+ * otherwise be very difficult to do. The only exception is rename which marks -+ * the re-named inode dirty (because its @i_ctime is updated) but does not -+ * write it, but just marks it as dirty. -+ */ -+ -+#include "ubifs.h" -+ -+/** -+ * inherit_flags - inherit flags of the parent inode. -+ * @dir: parent inode -+ * @mode: new inode mode flags -+ * -+ * This is a helper function for 'ubifs_new_inode()' which inherits flag of the -+ * parent directory inode @dir. UBIFS inodes inherit the following flags: -+ * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on -+ * sub-directory basis; -+ * o %UBIFS_SYNC_FL - useful for the same reasons; -+ * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories. -+ * -+ * This function returns the inherited flags. -+ */ -+static int inherit_flags(const struct inode *dir, int mode) -+{ -+ int flags; -+ const struct ubifs_inode *ui = ubifs_inode(dir); -+ -+ if (!S_ISDIR(dir->i_mode)) -+ /* -+ * The parent is not a directory, which means that an extended -+ * attribute inode is being created. No flags. -+ */ -+ return 0; -+ -+ flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL); -+ if (!S_ISDIR(mode)) -+ /* The "DIRSYNC" flag only applies to directories */ -+ flags &= ~UBIFS_DIRSYNC_FL; -+ return flags; -+} -+ -+/** -+ * ubifs_new_inode - allocate new UBIFS inode object. -+ * @c: UBIFS file-system description object -+ * @dir: parent directory inode -+ * @mode: inode mode flags -+ * -+ * This function finds an unused inode number, allocates new inode and -+ * initializes it. Returns new inode in case of success and an error code in -+ * case of failure. -+ */ -+struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, -+ int mode) -+{ -+ struct inode *inode; -+ struct ubifs_inode *ui; -+ -+ inode = new_inode(c->vfs_sb); -+ ui = ubifs_inode(inode); -+ if (!inode) -+ return ERR_PTR(-ENOMEM); -+ -+ /* -+ * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and -+ * marking them dirty in file write path (see 'file_update_time()'). -+ * UBIFS has to fully control "clean <-> dirty" transitions of inodes -+ * to make budgeting work. -+ */ -+ inode->i_flags |= (S_NOCMTIME); -+ -+ inode->i_uid = current->fsuid; -+ if (dir->i_mode & S_ISGID) { -+ inode->i_gid = dir->i_gid; -+ if (S_ISDIR(mode)) -+ mode |= S_ISGID; -+ } else -+ inode->i_gid = current->fsgid; -+ inode->i_mode = mode; -+ inode->i_mtime = inode->i_atime = inode->i_ctime = -+ ubifs_current_time(inode); -+ inode->i_mapping->nrpages = 0; -+ /* Disable readahead */ -+ inode->i_mapping->backing_dev_info = &c->bdi; -+ -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ inode->i_mapping->a_ops = &ubifs_file_address_operations; -+ inode->i_op = &ubifs_file_inode_operations; -+ inode->i_fop = &ubifs_file_operations; -+ break; -+ case S_IFDIR: -+ inode->i_op = &ubifs_dir_inode_operations; -+ inode->i_fop = &ubifs_dir_operations; -+ inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ; -+ break; -+ case S_IFLNK: -+ inode->i_op = &ubifs_symlink_inode_operations; -+ break; -+ case S_IFSOCK: -+ case S_IFIFO: -+ case S_IFBLK: -+ case S_IFCHR: -+ inode->i_op = &ubifs_file_inode_operations; -+ break; -+ default: -+ BUG(); -+ } -+ -+ ui->flags = inherit_flags(dir, mode); -+ ubifs_set_inode_flags(inode); -+ if (S_ISREG(mode)) -+ ui->compr_type = c->default_compr; -+ else -+ ui->compr_type = UBIFS_COMPR_NONE; -+ ui->synced_i_size = 0; -+ -+ spin_lock(&c->cnt_lock); -+ /* Inode number overflow is currently not supported */ -+ if (c->highest_inum >= INUM_WARN_WATERMARK) { -+ if (c->highest_inum >= INUM_WATERMARK) { -+ spin_unlock(&c->cnt_lock); -+ ubifs_err("out of inode numbers"); -+ make_bad_inode(inode); -+ iput(inode); -+ return ERR_PTR(-EINVAL); -+ } -+ ubifs_warn("running out of inode numbers (current %lu, max %d)", -+ (unsigned long)c->highest_inum, INUM_WATERMARK); -+ } -+ -+ inode->i_ino = ++c->highest_inum; -+ /* -+ * The creation sequence number remains with this inode for its -+ * lifetime. All nodes for this inode have a greater sequence number, -+ * and so it is possible to distinguish obsolete nodes belonging to a -+ * previous incarnation of the same inode number - for example, for the -+ * purpose of rebuilding the index. -+ */ -+ ui->creat_sqnum = ++c->max_sqnum; -+ spin_unlock(&c->cnt_lock); -+ return inode; -+} -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) -+{ -+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) -+ return 0; -+ if (le16_to_cpu(dent->nlen) != nm->len) -+ return -EINVAL; -+ if (memcmp(dent->name, nm->name, nm->len)) -+ return -EINVAL; -+ return 0; -+} -+ -+#else -+ -+#define dbg_check_name(dent, nm) 0 -+ -+#endif -+ -+static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, -+ struct nameidata *nd) -+{ -+ int err; -+ union ubifs_key key; -+ struct inode *inode = NULL; -+ struct ubifs_dent_node *dent; -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ -+ dbg_gen("'%.*s' in dir ino %lu", -+ dentry->d_name.len, dentry->d_name.name, dir->i_ino); -+ -+ if (dentry->d_name.len > UBIFS_MAX_NLEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); -+ if (!dent) -+ return ERR_PTR(-ENOMEM); -+ -+ dent_key_init(c, &key, dir->i_ino, &dentry->d_name); -+ -+ err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); -+ if (err) { -+ /* -+ * Do not hash the direntry if parent 'i_nlink' is zero, because -+ * this has side-effects - '->delete_inode()' call will not be -+ * called for the parent orphan inode, because 'd_count' of its -+ * direntry will stay 1 (it'll be negative direntry I guess) -+ * and prevent 'iput_final()' until the dentry is destroyed due -+ * to unmount or memory pressure. -+ */ -+ if (err == -ENOENT && dir->i_nlink != 0) { -+ dbg_gen("not found"); -+ goto done; -+ } -+ goto out; -+ } -+ -+ if (dbg_check_name(dent, &dentry->d_name)) { -+ err = -EINVAL; -+ goto out; -+ } -+ -+ inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum)); -+ if (IS_ERR(inode)) { -+ /* -+ * This should not happen. Probably the file-system needs -+ * checking. -+ */ -+ err = PTR_ERR(inode); -+ ubifs_err("dead directory entry '%.*s', error %d", -+ dentry->d_name.len, dentry->d_name.name, err); -+ ubifs_ro_mode(c, err); -+ goto out; -+ } -+ -+done: -+ kfree(dent); -+ return d_splice_alias(inode, dentry); -+ -+out: -+ kfree(dent); -+ return ERR_PTR(err); -+} -+ -+static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, -+ struct nameidata *nd) -+{ -+ struct inode *inode; -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); -+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, -+ .dirtied_ino = 1 }; -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ -+ /* -+ * Budget request settings: new inode, new direntry, changing the -+ * parent directory inode. -+ */ -+ -+ dbg_gen("dent '%.*s', mode %#x in dir ino %lu", -+ dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ inode = ubifs_new_inode(c, dir, mode); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_budg; -+ } -+ -+ mutex_lock(&dir_ui->ui_mutex); -+ dir->i_size += sz_change; -+ dir_ui->ui_size = dir->i_size; -+ dir->i_mtime = dir->i_ctime = inode->i_ctime; -+ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); -+ if (err) -+ goto out_cancel; -+ mutex_unlock(&dir_ui->ui_mutex); -+ -+ ubifs_release_budget(c, &req); -+ insert_inode_hash(inode); -+ d_instantiate(dentry, inode); -+ return 0; -+ -+out_cancel: -+ dir->i_size -= sz_change; -+ dir_ui->ui_size = dir->i_size; -+ mutex_unlock(&dir_ui->ui_mutex); -+ make_bad_inode(inode); -+ iput(inode); -+out_budg: -+ ubifs_release_budget(c, &req); -+ ubifs_err("cannot create regular file, error %d", err); -+ return err; -+} -+ -+/** -+ * vfs_dent_type - get VFS directory entry type. -+ * @type: UBIFS directory entry type -+ * -+ * This function converts UBIFS directory entry type into VFS directory entry -+ * type. -+ */ -+static unsigned int vfs_dent_type(uint8_t type) -+{ -+ switch (type) { -+ case UBIFS_ITYPE_REG: -+ return DT_REG; -+ case UBIFS_ITYPE_DIR: -+ return DT_DIR; -+ case UBIFS_ITYPE_LNK: -+ return DT_LNK; -+ case UBIFS_ITYPE_BLK: -+ return DT_BLK; -+ case UBIFS_ITYPE_CHR: -+ return DT_CHR; -+ case UBIFS_ITYPE_FIFO: -+ return DT_FIFO; -+ case UBIFS_ITYPE_SOCK: -+ return DT_SOCK; -+ default: -+ BUG(); -+ } -+ return 0; -+} -+ -+/* -+ * The classical Unix view for directory is that it is a linear array of -+ * (name, inode number) entries. Linux/VFS assumes this model as well. -+ * Particularly, 'readdir()' call wants us to return a directory entry offset -+ * which later may be used to continue 'readdir()'ing the directory or to -+ * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this -+ * model because directory entries are identified by keys, which may collide. -+ * -+ * UBIFS uses directory entry hash value for directory offsets, so -+ * 'seekdir()'/'telldir()' may not always work because of possible key -+ * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work -+ * properly by means of saving full directory entry name in the private field -+ * of the file description object. -+ * -+ * This means that UBIFS cannot support NFS which requires full -+ * 'seekdir()'/'telldir()' support. -+ */ -+static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) -+{ -+ int err, over = 0; -+ struct qstr nm; -+ union ubifs_key key; -+ struct ubifs_dent_node *dent; -+ struct inode *dir = file->f_path.dentry->d_inode; -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ -+ dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); -+ -+ if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) -+ /* -+ * The directory was seek'ed to a senseless position or there -+ * are no more entries. -+ */ -+ return 0; -+ -+ /* File positions 0 and 1 correspond to "." and ".." */ -+ if (file->f_pos == 0) { -+ ubifs_assert(!file->private_data); -+ over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); -+ if (over) -+ return 0; -+ file->f_pos = 1; -+ } -+ -+ if (file->f_pos == 1) { -+ ubifs_assert(!file->private_data); -+ over = filldir(dirent, "..", 2, 1, -+ parent_ino(file->f_path.dentry), DT_DIR); -+ if (over) -+ return 0; -+ -+ /* Find the first entry in TNC and save it */ -+ lowest_dent_key(c, &key, dir->i_ino); -+ nm.name = NULL; -+ dent = ubifs_tnc_next_ent(c, &key, &nm); -+ if (IS_ERR(dent)) { -+ err = PTR_ERR(dent); -+ goto out; -+ } -+ -+ file->f_pos = key_hash_flash(c, &dent->key); -+ file->private_data = dent; -+ } -+ -+ dent = file->private_data; -+ if (!dent) { -+ /* -+ * The directory was seek'ed to and is now readdir'ed. -+ * Find the entry corresponding to @file->f_pos or the -+ * closest one. -+ */ -+ dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); -+ nm.name = NULL; -+ dent = ubifs_tnc_next_ent(c, &key, &nm); -+ if (IS_ERR(dent)) { -+ err = PTR_ERR(dent); -+ goto out; -+ } -+ file->f_pos = key_hash_flash(c, &dent->key); -+ file->private_data = dent; -+ } -+ -+ while (1) { -+ dbg_gen("feed '%s', ino %llu, new f_pos %#x", -+ dent->name, (unsigned long long)le64_to_cpu(dent->inum), -+ key_hash_flash(c, &dent->key)); -+ ubifs_assert(le64_to_cpu(dent->ch.sqnum) > -+ ubifs_inode(dir)->creat_sqnum); -+ -+ nm.len = le16_to_cpu(dent->nlen); -+ over = filldir(dirent, dent->name, nm.len, file->f_pos, -+ le64_to_cpu(dent->inum), -+ vfs_dent_type(dent->type)); -+ if (over) -+ return 0; -+ -+ /* Switch to the next entry */ -+ key_read(c, &dent->key, &key); -+ nm.name = dent->name; -+ dent = ubifs_tnc_next_ent(c, &key, &nm); -+ if (IS_ERR(dent)) { -+ err = PTR_ERR(dent); -+ goto out; -+ } -+ -+ kfree(file->private_data); -+ file->f_pos = key_hash_flash(c, &dent->key); -+ file->private_data = dent; -+ cond_resched(); -+ } -+ -+out: -+ if (err != -ENOENT) { -+ ubifs_err("cannot find next direntry, error %d", err); -+ return err; -+ } -+ -+ kfree(file->private_data); -+ file->private_data = NULL; -+ file->f_pos = 2; -+ return 0; -+} -+ -+/* If a directory is seeked, we have to free saved readdir() state */ -+static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) -+{ -+ kfree(file->private_data); -+ file->private_data = NULL; -+ return generic_file_llseek(file, offset, origin); -+} -+ -+/* Free saved readdir() state when the directory is closed */ -+static int ubifs_dir_release(struct inode *dir, struct file *file) -+{ -+ kfree(file->private_data); -+ file->private_data = NULL; -+ return 0; -+} -+ -+/** -+ * lock_2_inodes - a wrapper for locking two UBIFS inodes. -+ * @inode1: first inode -+ * @inode2: second inode -+ * -+ * We do not implement any tricks to guarantee strict lock ordering, because -+ * VFS has already done it for us on the @i_mutex. So this is just a simple -+ * wrapper function. -+ */ -+static void lock_2_inodes(struct inode *inode1, struct inode *inode2) -+{ -+ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); -+ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); -+} -+ -+/** -+ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. -+ * @inode1: first inode -+ * @inode2: second inode -+ */ -+static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) -+{ -+ mutex_unlock(&ubifs_inode(inode2)->ui_mutex); -+ mutex_unlock(&ubifs_inode(inode1)->ui_mutex); -+} -+ -+static int ubifs_link(struct dentry *old_dentry, struct inode *dir, -+ struct dentry *dentry) -+{ -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ struct inode *inode = old_dentry->d_inode; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); -+ struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, -+ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; -+ -+ /* -+ * Budget request settings: new direntry, changing the target inode, -+ * changing the parent inode. -+ */ -+ -+ dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", -+ dentry->d_name.len, dentry->d_name.name, inode->i_ino, -+ inode->i_nlink, dir->i_ino); -+ ubifs_assert(mutex_is_locked(&dir->i_mutex)); -+ ubifs_assert(mutex_is_locked(&inode->i_mutex)); -+ err = dbg_check_synced_i_size(inode); -+ if (err) -+ return err; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ lock_2_inodes(dir, inode); -+ inc_nlink(inode); -+ atomic_inc(&inode->i_count); -+ inode->i_ctime = ubifs_current_time(inode); -+ dir->i_size += sz_change; -+ dir_ui->ui_size = dir->i_size; -+ dir->i_mtime = dir->i_ctime = inode->i_ctime; -+ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); -+ if (err) -+ goto out_cancel; -+ unlock_2_inodes(dir, inode); -+ -+ ubifs_release_budget(c, &req); -+ d_instantiate(dentry, inode); -+ return 0; -+ -+out_cancel: -+ dir->i_size -= sz_change; -+ dir_ui->ui_size = dir->i_size; -+ drop_nlink(inode); -+ unlock_2_inodes(dir, inode); -+ ubifs_release_budget(c, &req); -+ iput(inode); -+ return err; -+} -+ -+static int ubifs_unlink(struct inode *dir, struct dentry *dentry) -+{ -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ struct inode *inode = dentry->d_inode; -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); -+ int err, budgeted = 1; -+ struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; -+ -+ /* -+ * Budget request settings: deletion direntry, deletion inode (+1 for -+ * @dirtied_ino), changing the parent directory inode. If budgeting -+ * fails, go ahead anyway because we have extra space reserved for -+ * deletions. -+ */ -+ -+ dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", -+ dentry->d_name.len, dentry->d_name.name, inode->i_ino, -+ inode->i_nlink, dir->i_ino); -+ ubifs_assert(mutex_is_locked(&dir->i_mutex)); -+ ubifs_assert(mutex_is_locked(&inode->i_mutex)); -+ err = dbg_check_synced_i_size(inode); -+ if (err) -+ return err; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) { -+ if (err != -ENOSPC) -+ return err; -+ budgeted = 0; -+ } -+ -+ lock_2_inodes(dir, inode); -+ inode->i_ctime = ubifs_current_time(dir); -+ drop_nlink(inode); -+ dir->i_size -= sz_change; -+ dir_ui->ui_size = dir->i_size; -+ dir->i_mtime = dir->i_ctime = inode->i_ctime; -+ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); -+ if (err) -+ goto out_cancel; -+ unlock_2_inodes(dir, inode); -+ -+ if (budgeted) -+ ubifs_release_budget(c, &req); -+ else { -+ /* We've deleted something - clean the "no space" flags */ -+ c->nospace = c->nospace_rp = 0; -+ smp_wmb(); -+ } -+ return 0; -+ -+out_cancel: -+ dir->i_size += sz_change; -+ dir_ui->ui_size = dir->i_size; -+ inc_nlink(inode); -+ unlock_2_inodes(dir, inode); -+ if (budgeted) -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+/** -+ * check_dir_empty - check if a directory is empty or not. -+ * @c: UBIFS file-system description object -+ * @dir: VFS inode object of the directory to check -+ * -+ * This function checks if directory @dir is empty. Returns zero if the -+ * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes -+ * in case of of errors. -+ */ -+static int check_dir_empty(struct ubifs_info *c, struct inode *dir) -+{ -+ struct qstr nm = { .name = NULL }; -+ struct ubifs_dent_node *dent; -+ union ubifs_key key; -+ int err; -+ -+ lowest_dent_key(c, &key, dir->i_ino); -+ dent = ubifs_tnc_next_ent(c, &key, &nm); -+ if (IS_ERR(dent)) { -+ err = PTR_ERR(dent); -+ if (err == -ENOENT) -+ err = 0; -+ } else { -+ kfree(dent); -+ err = -ENOTEMPTY; -+ } -+ return err; -+} -+ -+static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) -+{ -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ struct inode *inode = dentry->d_inode; -+ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); -+ int err, budgeted = 1; -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; -+ -+ /* -+ * Budget request settings: deletion direntry, deletion inode and -+ * changing the parent inode. If budgeting fails, go ahead anyway -+ * because we have extra space reserved for deletions. -+ */ -+ -+ dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, -+ dentry->d_name.name, inode->i_ino, dir->i_ino); -+ ubifs_assert(mutex_is_locked(&dir->i_mutex)); -+ ubifs_assert(mutex_is_locked(&inode->i_mutex)); -+ err = check_dir_empty(c, dentry->d_inode); -+ if (err) -+ return err; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) { -+ if (err != -ENOSPC) -+ return err; -+ budgeted = 0; -+ } -+ -+ lock_2_inodes(dir, inode); -+ inode->i_ctime = ubifs_current_time(dir); -+ clear_nlink(inode); -+ drop_nlink(dir); -+ dir->i_size -= sz_change; -+ dir_ui->ui_size = dir->i_size; -+ dir->i_mtime = dir->i_ctime = inode->i_ctime; -+ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); -+ if (err) -+ goto out_cancel; -+ unlock_2_inodes(dir, inode); -+ -+ if (budgeted) -+ ubifs_release_budget(c, &req); -+ else { -+ /* We've deleted something - clean the "no space" flags */ -+ c->nospace = c->nospace_rp = 0; -+ smp_wmb(); -+ } -+ return 0; -+ -+out_cancel: -+ dir->i_size += sz_change; -+ dir_ui->ui_size = dir->i_size; -+ inc_nlink(dir); -+ inc_nlink(inode); -+ inc_nlink(inode); -+ unlock_2_inodes(dir, inode); -+ if (budgeted) -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ struct inode *inode; -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); -+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; -+ -+ /* -+ * Budget request settings: new inode, new direntry and changing parent -+ * directory inode. -+ */ -+ -+ dbg_gen("dent '%.*s', mode %#x in dir ino %lu", -+ dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ inode = ubifs_new_inode(c, dir, S_IFDIR | mode); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_budg; -+ } -+ -+ mutex_lock(&dir_ui->ui_mutex); -+ insert_inode_hash(inode); -+ inc_nlink(inode); -+ inc_nlink(dir); -+ dir->i_size += sz_change; -+ dir_ui->ui_size = dir->i_size; -+ dir->i_mtime = dir->i_ctime = inode->i_ctime; -+ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); -+ if (err) { -+ ubifs_err("cannot create directory, error %d", err); -+ goto out_cancel; -+ } -+ mutex_unlock(&dir_ui->ui_mutex); -+ -+ ubifs_release_budget(c, &req); -+ d_instantiate(dentry, inode); -+ return 0; -+ -+out_cancel: -+ dir->i_size -= sz_change; -+ dir_ui->ui_size = dir->i_size; -+ drop_nlink(dir); -+ mutex_unlock(&dir_ui->ui_mutex); -+ make_bad_inode(inode); -+ iput(inode); -+out_budg: -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+static int ubifs_mknod(struct inode *dir, struct dentry *dentry, -+ int mode, dev_t rdev) -+{ -+ struct inode *inode; -+ struct ubifs_inode *ui; -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ union ubifs_dev_desc *dev = NULL; -+ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); -+ int err, devlen = 0; -+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, -+ .new_ino_d = ALIGN(devlen, 8), -+ .dirtied_ino = 1 }; -+ -+ /* -+ * Budget request settings: new inode, new direntry and changing parent -+ * directory inode. -+ */ -+ -+ dbg_gen("dent '%.*s' in dir ino %lu", -+ dentry->d_name.len, dentry->d_name.name, dir->i_ino); -+ -+ if (!new_valid_dev(rdev)) -+ return -EINVAL; -+ -+ if (S_ISBLK(mode) || S_ISCHR(mode)) { -+ dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); -+ if (!dev) -+ return -ENOMEM; -+ devlen = ubifs_encode_dev(dev, rdev); -+ } -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) { -+ kfree(dev); -+ return err; -+ } -+ -+ inode = ubifs_new_inode(c, dir, mode); -+ if (IS_ERR(inode)) { -+ kfree(dev); -+ err = PTR_ERR(inode); -+ goto out_budg; -+ } -+ -+ init_special_inode(inode, inode->i_mode, rdev); -+ inode->i_size = ubifs_inode(inode)->ui_size = devlen; -+ ui = ubifs_inode(inode); -+ ui->data = dev; -+ ui->data_len = devlen; -+ -+ mutex_lock(&dir_ui->ui_mutex); -+ dir->i_size += sz_change; -+ dir_ui->ui_size = dir->i_size; -+ dir->i_mtime = dir->i_ctime = inode->i_ctime; -+ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); -+ if (err) -+ goto out_cancel; -+ mutex_unlock(&dir_ui->ui_mutex); -+ -+ ubifs_release_budget(c, &req); -+ insert_inode_hash(inode); -+ d_instantiate(dentry, inode); -+ return 0; -+ -+out_cancel: -+ dir->i_size -= sz_change; -+ dir_ui->ui_size = dir->i_size; -+ mutex_unlock(&dir_ui->ui_mutex); -+ make_bad_inode(inode); -+ iput(inode); -+out_budg: -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+static int ubifs_symlink(struct inode *dir, struct dentry *dentry, -+ const char *symname) -+{ -+ struct inode *inode; -+ struct ubifs_inode *ui; -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ struct ubifs_info *c = dir->i_sb->s_fs_info; -+ int err, len = strlen(symname); -+ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); -+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, -+ .new_ino_d = ALIGN(len, 8), -+ .dirtied_ino = 1 }; -+ -+ /* -+ * Budget request settings: new inode, new direntry and changing parent -+ * directory inode. -+ */ -+ -+ dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, -+ dentry->d_name.name, symname, dir->i_ino); -+ -+ if (len > UBIFS_MAX_INO_DATA) -+ return -ENAMETOOLONG; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_budg; -+ } -+ -+ ui = ubifs_inode(inode); -+ ui->data = kmalloc(len + 1, GFP_NOFS); -+ if (!ui->data) { -+ err = -ENOMEM; -+ goto out_inode; -+ } -+ -+ memcpy(ui->data, symname, len); -+ ((char *)ui->data)[len] = '\0'; -+ /* -+ * The terminating zero byte is not written to the flash media and it -+ * is put just to make later in-memory string processing simpler. Thus, -+ * data length is @len, not @len + %1. -+ */ -+ ui->data_len = len; -+ inode->i_size = ubifs_inode(inode)->ui_size = len; -+ -+ mutex_lock(&dir_ui->ui_mutex); -+ dir->i_size += sz_change; -+ dir_ui->ui_size = dir->i_size; -+ dir->i_mtime = dir->i_ctime = inode->i_ctime; -+ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); -+ if (err) -+ goto out_cancel; -+ mutex_unlock(&dir_ui->ui_mutex); -+ -+ ubifs_release_budget(c, &req); -+ insert_inode_hash(inode); -+ d_instantiate(dentry, inode); -+ return 0; -+ -+out_cancel: -+ dir->i_size -= sz_change; -+ dir_ui->ui_size = dir->i_size; -+ mutex_unlock(&dir_ui->ui_mutex); -+out_inode: -+ make_bad_inode(inode); -+ iput(inode); -+out_budg: -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+/** -+ * lock_3_inodes - a wrapper for locking three UBIFS inodes. -+ * @inode1: first inode -+ * @inode2: second inode -+ * @inode3: third inode -+ * -+ * This function is used for 'ubifs_rename()' and @inode1 may be the same as -+ * @inode2 whereas @inode3 may be %NULL. -+ * -+ * We do not implement any tricks to guarantee strict lock ordering, because -+ * VFS has already done it for us on the @i_mutex. So this is just a simple -+ * wrapper function. -+ */ -+static void lock_3_inodes(struct inode *inode1, struct inode *inode2, -+ struct inode *inode3) -+{ -+ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); -+ if (inode2 != inode1) -+ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); -+ if (inode3) -+ mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3); -+} -+ -+/** -+ * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename. -+ * @inode1: first inode -+ * @inode2: second inode -+ * @inode3: third inode -+ */ -+static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, -+ struct inode *inode3) -+{ -+ if (inode3) -+ mutex_unlock(&ubifs_inode(inode3)->ui_mutex); -+ if (inode1 != inode2) -+ mutex_unlock(&ubifs_inode(inode2)->ui_mutex); -+ mutex_unlock(&ubifs_inode(inode1)->ui_mutex); -+} -+ -+static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry) -+{ -+ struct ubifs_info *c = old_dir->i_sb->s_fs_info; -+ struct inode *old_inode = old_dentry->d_inode; -+ struct inode *new_inode = new_dentry->d_inode; -+ struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode); -+ int err, release, sync = 0, move = (new_dir != old_dir); -+ int is_dir = S_ISDIR(old_inode->i_mode); -+ int unlink = !!new_inode; -+ int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len); -+ int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len); -+ struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, -+ .dirtied_ino = 3 }; -+ struct ubifs_budget_req ino_req = { .dirtied_ino = 1, -+ .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; -+ struct timespec time; -+ -+ /* -+ * Budget request settings: deletion direntry, new direntry, removing -+ * the old inode, and changing old and new parent directory inodes. -+ * -+ * However, this operation also marks the target inode as dirty and -+ * does not write it, so we allocate budget for the target inode -+ * separately. -+ */ -+ -+ dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " -+ "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, -+ old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, -+ new_dentry->d_name.name, new_dir->i_ino); -+ ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); -+ ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); -+ if (unlink) -+ ubifs_assert(mutex_is_locked(&new_inode->i_mutex)); -+ -+ -+ if (unlink && is_dir) { -+ err = check_dir_empty(c, new_inode); -+ if (err) -+ return err; -+ } -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ err = ubifs_budget_space(c, &ino_req); -+ if (err) { -+ ubifs_release_budget(c, &req); -+ return err; -+ } -+ -+ lock_3_inodes(old_dir, new_dir, new_inode); -+ -+ /* -+ * Like most other Unix systems, set the @i_ctime for inodes on a -+ * rename. -+ */ -+ time = ubifs_current_time(old_dir); -+ old_inode->i_ctime = time; -+ -+ /* We must adjust parent link count when renaming directories */ -+ if (is_dir) { -+ if (move) { -+ /* -+ * @old_dir loses a link because we are moving -+ * @old_inode to a different directory. -+ */ -+ drop_nlink(old_dir); -+ /* -+ * @new_dir only gains a link if we are not also -+ * overwriting an existing directory. -+ */ -+ if (!unlink) -+ inc_nlink(new_dir); -+ } else { -+ /* -+ * @old_inode is not moving to a different directory, -+ * but @old_dir still loses a link if we are -+ * overwriting an existing directory. -+ */ -+ if (unlink) -+ drop_nlink(old_dir); -+ } -+ } -+ -+ old_dir->i_size -= old_sz; -+ ubifs_inode(old_dir)->ui_size = old_dir->i_size; -+ old_dir->i_mtime = old_dir->i_ctime = time; -+ new_dir->i_mtime = new_dir->i_ctime = time; -+ -+ /* -+ * And finally, if we unlinked a direntry which happened to have the -+ * same name as the moved direntry, we have to decrement @i_nlink of -+ * the unlinked inode and change its ctime. -+ */ -+ if (unlink) { -+ /* -+ * Directories cannot have hard-links, so if this is a -+ * directory, decrement its @i_nlink twice because an empty -+ * directory has @i_nlink 2. -+ */ -+ if (is_dir) -+ drop_nlink(new_inode); -+ new_inode->i_ctime = time; -+ drop_nlink(new_inode); -+ } else { -+ new_dir->i_size += new_sz; -+ ubifs_inode(new_dir)->ui_size = new_dir->i_size; -+ } -+ -+ /* -+ * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode -+ * is dirty, because this will be done later on at the end of -+ * 'ubifs_rename()'. -+ */ -+ if (IS_SYNC(old_inode)) { -+ sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); -+ if (unlink && IS_SYNC(new_inode)) -+ sync = 1; -+ } -+ err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry, -+ sync); -+ if (err) -+ goto out_cancel; -+ -+ unlock_3_inodes(old_dir, new_dir, new_inode); -+ ubifs_release_budget(c, &req); -+ -+ mutex_lock(&old_inode_ui->ui_mutex); -+ release = old_inode_ui->dirty; -+ mark_inode_dirty_sync(old_inode); -+ mutex_unlock(&old_inode_ui->ui_mutex); -+ -+ if (release) -+ ubifs_release_budget(c, &ino_req); -+ if (IS_SYNC(old_inode)) -+ err = old_inode->i_sb->s_op->write_inode(old_inode, 1); -+ return err; -+ -+out_cancel: -+ if (unlink) { -+ if (is_dir) -+ inc_nlink(new_inode); -+ inc_nlink(new_inode); -+ } else { -+ new_dir->i_size -= new_sz; -+ ubifs_inode(new_dir)->ui_size = new_dir->i_size; -+ } -+ old_dir->i_size += old_sz; -+ ubifs_inode(old_dir)->ui_size = old_dir->i_size; -+ if (is_dir) { -+ if (move) { -+ inc_nlink(old_dir); -+ if (!unlink) -+ drop_nlink(new_dir); -+ } else { -+ if (unlink) -+ inc_nlink(old_dir); -+ } -+ } -+ unlock_3_inodes(old_dir, new_dir, new_inode); -+ ubifs_release_budget(c, &ino_req); -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, -+ struct kstat *stat) -+{ -+ loff_t size; -+ struct inode *inode = dentry->d_inode; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ mutex_lock(&ui->ui_mutex); -+ stat->dev = inode->i_sb->s_dev; -+ stat->ino = inode->i_ino; -+ stat->mode = inode->i_mode; -+ stat->nlink = inode->i_nlink; -+ stat->uid = inode->i_uid; -+ stat->gid = inode->i_gid; -+ stat->rdev = inode->i_rdev; -+ stat->atime = inode->i_atime; -+ stat->mtime = inode->i_mtime; -+ stat->ctime = inode->i_ctime; -+ stat->blksize = UBIFS_BLOCK_SIZE; -+ stat->size = ui->ui_size; -+ -+ /* -+ * Unfortunately, the 'stat()' system call was designed for block -+ * device based file systems, and it is not appropriate for UBIFS, -+ * because UBIFS does not have notion of "block". For example, it is -+ * difficult to tell how many block a directory takes - it actually -+ * takes less than 300 bytes, but we have to round it to block size, -+ * which introduces large mistake. This makes utilities like 'du' to -+ * report completely senseless numbers. This is the reason why UBIFS -+ * goes the same way as JFFS2 - it reports zero blocks for everything -+ * but regular files, which makes more sense than reporting completely -+ * wrong sizes. -+ */ -+ if (S_ISREG(inode->i_mode)) { -+ size = ui->xattr_size; -+ size += stat->size; -+ size = ALIGN(size, UBIFS_BLOCK_SIZE); -+ /* -+ * Note, user-space expects 512-byte blocks count irrespectively -+ * of what was reported in @stat->size. -+ */ -+ stat->blocks = size >> 9; -+ } else -+ stat->blocks = 0; -+ mutex_unlock(&ui->ui_mutex); -+ return 0; -+} -+ -+const struct inode_operations ubifs_dir_inode_operations = { -+ .lookup = ubifs_lookup, -+ .create = ubifs_create, -+ .link = ubifs_link, -+ .symlink = ubifs_symlink, -+ .unlink = ubifs_unlink, -+ .mkdir = ubifs_mkdir, -+ .rmdir = ubifs_rmdir, -+ .mknod = ubifs_mknod, -+ .rename = ubifs_rename, -+ .setattr = ubifs_setattr, -+ .getattr = ubifs_getattr, -+#ifdef CONFIG_UBIFS_FS_XATTR -+ .setxattr = ubifs_setxattr, -+ .getxattr = ubifs_getxattr, -+ .listxattr = ubifs_listxattr, -+ .removexattr = ubifs_removexattr, -+#endif -+}; -+ -+const struct file_operations ubifs_dir_operations = { -+ .llseek = ubifs_dir_llseek, -+ .release = ubifs_dir_release, -+ .read = generic_read_dir, -+ .readdir = ubifs_readdir, -+ .fsync = ubifs_fsync, -+ .unlocked_ioctl = ubifs_ioctl, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = ubifs_compat_ioctl, -+#endif -+}; -diff -Nurd linux-2.6.24.orig/fs/ubifs/file.c linux-2.6.24/fs/ubifs/file.c ---- linux-2.6.24.orig/fs/ubifs/file.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/file.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1583 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file implements VFS file and inode operations of regular files, device -+ * nodes and symlinks as well as address space operations. -+ * -+ * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the -+ * page is dirty and is used for budgeting purposes - dirty pages should not be -+ * budgeted. The PG_checked flag is set if full budgeting is required for the -+ * page e.g., when it corresponds to a file hole or it is just beyond the file -+ * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to -+ * fail in this function, and the budget is released in 'ubifs_write_end()'. So -+ * the PG_private and PG_checked flags carry the information about how the page -+ * was budgeted, to make it possible to release the budget properly. -+ * -+ * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations -+ * we implement. However, this is not true for '->writepage()', which might be -+ * called with 'i_mutex' unlocked. For example, when pdflush is performing -+ * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the -+ * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is -+ * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim -+ * path'. So, in '->writepage()' we are only guaranteed that the page is -+ * locked. -+ * -+ * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., -+ * readahead path does not have it locked ("sys_read -> generic_file_aio_read -+ * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is -+ * not set as well. However, UBIFS disables readahead. -+ * -+ * This, for example means that there might be 2 concurrent '->writepage()' -+ * calls for the same inode, but different inode dirty pages. -+ */ -+ -+#include "ubifs.h" -+#include <linux/mount.h> -+ -+static int read_block(struct inode *inode, void *addr, unsigned int block, -+ struct ubifs_data_node *dn) -+{ -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ int err, len, out_len; -+ union ubifs_key key; -+ unsigned int dlen; -+ -+ data_key_init(c, &key, inode->i_ino, block); -+ err = ubifs_tnc_lookup(c, &key, dn); -+ if (err) { -+ if (err == -ENOENT) -+ /* Not found, so it must be a hole */ -+ memset(addr, 0, UBIFS_BLOCK_SIZE); -+ return err; -+ } -+ -+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) > -+ ubifs_inode(inode)->creat_sqnum); -+ len = le32_to_cpu(dn->size); -+ if (len <= 0 || len > UBIFS_BLOCK_SIZE) -+ goto dump; -+ -+ dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; -+ out_len = UBIFS_BLOCK_SIZE; -+ err = ubifs_decompress(&dn->data, dlen, addr, &out_len, -+ le16_to_cpu(dn->compr_type)); -+ if (err || len != out_len) -+ goto dump; -+ -+ /* -+ * Data length can be less than a full block, even for blocks that are -+ * not the last in the file (e.g., as a result of making a hole and -+ * appending data). Ensure that the remainder is zeroed out. -+ */ -+ if (len < UBIFS_BLOCK_SIZE) -+ memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); -+ -+ return 0; -+ -+dump: -+ ubifs_err("bad data node (block %u, inode %lu)", -+ block, inode->i_ino); -+ dbg_dump_node(c, dn); -+ return -EINVAL; -+} -+ -+static int do_readpage(struct page *page) -+{ -+ void *addr; -+ int err = 0, i; -+ unsigned int block, beyond; -+ struct ubifs_data_node *dn; -+ struct inode *inode = page->mapping->host; -+ loff_t i_size = i_size_read(inode); -+ -+ dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", -+ inode->i_ino, page->index, i_size, page->flags); -+ ubifs_assert(!PageChecked(page)); -+ ubifs_assert(!PagePrivate(page)); -+ -+ addr = kmap(page); -+ -+ block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; -+ beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; -+ if (block >= beyond) { -+ /* Reading beyond inode */ -+ SetPageChecked(page); -+ memset(addr, 0, PAGE_CACHE_SIZE); -+ goto out; -+ } -+ -+ dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); -+ if (!dn) { -+ err = -ENOMEM; -+ goto error; -+ } -+ -+ i = 0; -+ while (1) { -+ int ret; -+ -+ if (block >= beyond) { -+ /* Reading beyond inode */ -+ err = -ENOENT; -+ memset(addr, 0, UBIFS_BLOCK_SIZE); -+ } else { -+ ret = read_block(inode, addr, block, dn); -+ if (ret) { -+ err = ret; -+ if (err != -ENOENT) -+ break; -+ } else if (block + 1 == beyond) { -+ int dlen = le32_to_cpu(dn->size); -+ int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); -+ -+ if (ilen && ilen < dlen) -+ memset(addr + ilen, 0, dlen - ilen); -+ } -+ } -+ if (++i >= UBIFS_BLOCKS_PER_PAGE) -+ break; -+ block += 1; -+ addr += UBIFS_BLOCK_SIZE; -+ } -+ if (err) { -+ if (err == -ENOENT) { -+ /* Not found, so it must be a hole */ -+ SetPageChecked(page); -+ dbg_gen("hole"); -+ goto out_free; -+ } -+ ubifs_err("cannot read page %lu of inode %lu, error %d", -+ page->index, inode->i_ino, err); -+ goto error; -+ } -+ -+out_free: -+ kfree(dn); -+out: -+ SetPageUptodate(page); -+ ClearPageError(page); -+ flush_dcache_page(page); -+ kunmap(page); -+ return 0; -+ -+error: -+ kfree(dn); -+ ClearPageUptodate(page); -+ SetPageError(page); -+ flush_dcache_page(page); -+ kunmap(page); -+ return err; -+} -+ -+/** -+ * release_new_page_budget - release budget of a new page. -+ * @c: UBIFS file-system description object -+ * -+ * This is a helper function which releases budget corresponding to the budget -+ * of one new page of data. -+ */ -+static void release_new_page_budget(struct ubifs_info *c) -+{ -+ struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 }; -+ -+ ubifs_release_budget(c, &req); -+} -+ -+/** -+ * release_existing_page_budget - release budget of an existing page. -+ * @c: UBIFS file-system description object -+ * -+ * This is a helper function which releases budget corresponding to the budget -+ * of changing one one page of data which already exists on the flash media. -+ */ -+static void release_existing_page_budget(struct ubifs_info *c) -+{ -+ struct ubifs_budget_req req = { .dd_growth = c->page_budget}; -+ -+ ubifs_release_budget(c, &req); -+} -+ -+static int write_begin_slow(struct address_space *mapping, -+ loff_t pos, unsigned len, struct page **pagep) -+{ -+ struct inode *inode = mapping->host; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ pgoff_t index = pos >> PAGE_CACHE_SHIFT; -+ struct ubifs_budget_req req = { .new_page = 1 }; -+ int uninitialized_var(err), appending = !!(pos + len > inode->i_size); -+ struct page *page; -+ -+ dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", -+ inode->i_ino, pos, len, inode->i_size); -+ -+ /* -+ * At the slow path we have to budget before locking the page, because -+ * budgeting may force write-back, which would wait on locked pages and -+ * deadlock if we had the page locked. At this point we do not know -+ * anything about the page, so assume that this is a new page which is -+ * written to a hole. This corresponds to largest budget. Later the -+ * budget will be amended if this is not true. -+ */ -+ if (appending) -+ /* We are appending data, budget for inode change */ -+ req.dirtied_ino = 1; -+ -+ err = ubifs_budget_space(c, &req); -+ if (unlikely(err)) -+ return err; -+ -+ page = __grab_cache_page(mapping, index); -+ if (unlikely(!page)) { -+ ubifs_release_budget(c, &req); -+ return -ENOMEM; -+ } -+ -+ if (!PageUptodate(page)) { -+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) -+ SetPageChecked(page); -+ else { -+ err = do_readpage(page); -+ if (err) { -+ unlock_page(page); -+ page_cache_release(page); -+ return err; -+ } -+ } -+ -+ SetPageUptodate(page); -+ ClearPageError(page); -+ } -+ -+ if (PagePrivate(page)) -+ /* -+ * The page is dirty, which means it was budgeted twice: -+ * o first time the budget was allocated by the task which -+ * made the page dirty and set the PG_private flag; -+ * o and then we budgeted for it for the second time at the -+ * very beginning of this function. -+ * -+ * So what we have to do is to release the page budget we -+ * allocated. -+ */ -+ release_new_page_budget(c); -+ else if (!PageChecked(page)) -+ /* -+ * We are changing a page which already exists on the media. -+ * This means that changing the page does not make the amount -+ * of indexing information larger, and this part of the budget -+ * which we have already acquired may be released. -+ */ -+ ubifs_convert_page_budget(c); -+ -+ if (appending) { -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ /* -+ * 'ubifs_write_end()' is optimized from the fast-path part of -+ * 'ubifs_write_begin()' and expects the @ui_mutex to be locked -+ * if data is appended. -+ */ -+ mutex_lock(&ui->ui_mutex); -+ if (ui->dirty) -+ /* -+ * The inode is dirty already, so we may free the -+ * budget we allocated. -+ */ -+ ubifs_release_dirty_inode_budget(c, ui); -+ } -+ -+ *pagep = page; -+ return 0; -+} -+ -+/** -+ * allocate_budget - allocate budget for 'ubifs_write_begin()'. -+ * @c: UBIFS file-system description object -+ * @page: page to allocate budget for -+ * @ui: UBIFS inode object the page belongs to -+ * @appending: non-zero if the page is appended -+ * -+ * This is a helper function for 'ubifs_write_begin()' which allocates budget -+ * for the operation. The budget is allocated differently depending on whether -+ * this is appending, whether the page is dirty or not, and so on. This -+ * function leaves the @ui->ui_mutex locked in case of appending. Returns zero -+ * in case of success and %-ENOSPC in case of failure. -+ */ -+static int allocate_budget(struct ubifs_info *c, struct page *page, -+ struct ubifs_inode *ui, int appending) -+{ -+ struct ubifs_budget_req req = { .fast = 1 }; -+ -+ if (PagePrivate(page)) { -+ if (!appending) -+ /* -+ * The page is dirty and we are not appending, which -+ * means no budget is needed at all. -+ */ -+ return 0; -+ -+ mutex_lock(&ui->ui_mutex); -+ if (ui->dirty) -+ /* -+ * The page is dirty and we are appending, so the inode -+ * has to be marked as dirty. However, it is already -+ * dirty, so we do not need any budget. We may return, -+ * but @ui->ui_mutex hast to be left locked because we -+ * should prevent write-back from flushing the inode -+ * and freeing the budget. The lock will be released in -+ * 'ubifs_write_end()'. -+ */ -+ return 0; -+ -+ /* -+ * The page is dirty, we are appending, the inode is clean, so -+ * we need to budget the inode change. -+ */ -+ req.dirtied_ino = 1; -+ } else { -+ if (PageChecked(page)) -+ /* -+ * The page corresponds to a hole and does not -+ * exist on the media. So changing it makes -+ * make the amount of indexing information -+ * larger, and we have to budget for a new -+ * page. -+ */ -+ req.new_page = 1; -+ else -+ /* -+ * Not a hole, the change will not add any new -+ * indexing information, budget for page -+ * change. -+ */ -+ req.dirtied_page = 1; -+ -+ if (appending) { -+ mutex_lock(&ui->ui_mutex); -+ if (!ui->dirty) -+ /* -+ * The inode is clean but we will have to mark -+ * it as dirty because we are appending. This -+ * needs a budget. -+ */ -+ req.dirtied_ino = 1; -+ } -+ } -+ -+ return ubifs_budget_space(c, &req); -+} -+ -+/* -+ * This function is called when a page of data is going to be written. Since -+ * the page of data will not necessarily go to the flash straight away, UBIFS -+ * has to reserve space on the media for it, which is done by means of -+ * budgeting. -+ * -+ * This is the hot-path of the file-system and we are trying to optimize it as -+ * much as possible. For this reasons it is split on 2 parts - slow and fast. -+ * -+ * There many budgeting cases: -+ * o a new page is appended - we have to budget for a new page and for -+ * changing the inode; however, if the inode is already dirty, there is -+ * no need to budget for it; -+ * o an existing clean page is changed - we have budget for it; if the page -+ * does not exist on the media (a hole), we have to budget for a new -+ * page; otherwise, we may budget for changing an existing page; the -+ * difference between these cases is that changing an existing page does -+ * not introduce anything new to the FS indexing information, so it does -+ * not grow, and smaller budget is acquired in this case; -+ * o an existing dirty page is changed - no need to budget at all, because -+ * the page budget has been acquired by earlier, when the page has been -+ * marked dirty. -+ * -+ * UBIFS budgeting sub-system may force write-back if it thinks there is no -+ * space to reserve. This imposes some locking restrictions and makes it -+ * impossible to take into account the above cases, and makes it impossible to -+ * optimize budgeting. -+ * -+ * The solution for this is that the fast path of 'ubifs_write_begin()' assumes -+ * there is a plenty of flash space and the budget will be acquired quickly, -+ * without forcing write-back. The slow path does not make this assumption. -+ */ -+static int ubifs_write_begin(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned flags, -+ struct page **pagep, void **fsdata) -+{ -+ struct inode *inode = mapping->host; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ pgoff_t index = pos >> PAGE_CACHE_SHIFT; -+ int uninitialized_var(err), appending = !!(pos + len > inode->i_size); -+ struct page *page; -+ -+ ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); -+ -+ if (unlikely(c->ro_media)) -+ return -EROFS; -+ -+ /* Try out the fast-path part first */ -+ page = __grab_cache_page(mapping, index); -+ if (unlikely(!page)) -+ return -ENOMEM; -+ -+ if (!PageUptodate(page)) { -+ /* The page is not loaded from the flash */ -+ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) -+ /* -+ * We change whole page so no need to load it. But we -+ * have to set the @PG_checked flag to make the further -+ * code the page is new. This might be not true, but it -+ * is better to budget more that to read the page from -+ * the media. -+ */ -+ SetPageChecked(page); -+ else { -+ err = do_readpage(page); -+ if (err) { -+ unlock_page(page); -+ page_cache_release(page); -+ return err; -+ } -+ } -+ -+ SetPageUptodate(page); -+ ClearPageError(page); -+ } -+ -+ err = allocate_budget(c, page, ui, appending); -+ if (unlikely(err)) { -+ ubifs_assert(err == -ENOSPC); -+ /* -+ * Budgeting failed which means it would have to force -+ * write-back but didn't, because we set the @fast flag in the -+ * request. Write-back cannot be done now, while we have the -+ * page locked, because it would deadlock. Unlock and free -+ * everything and fall-back to slow-path. -+ */ -+ if (appending) { -+ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); -+ mutex_unlock(&ui->ui_mutex); -+ } -+ unlock_page(page); -+ page_cache_release(page); -+ -+ return write_begin_slow(mapping, pos, len, pagep); -+ } -+ -+ /* -+ * Whee, we aquired budgeting quickly - without involving -+ * garbage-collection, committing or forceing write-back. We return -+ * with @ui->ui_mutex locked if we are appending pages, and unlocked -+ * otherwise. This is an optimization (slightly hacky though). -+ */ -+ *pagep = page; -+ return 0; -+ -+} -+ -+/** -+ * cancel_budget - cancel budget. -+ * @c: UBIFS file-system description object -+ * @page: page to cancel budget for -+ * @ui: UBIFS inode object the page belongs to -+ * @appending: non-zero if the page is appended -+ * -+ * This is a helper function for a page write operation. It unlocks the -+ * @ui->ui_mutex in case of appending. -+ */ -+static void cancel_budget(struct ubifs_info *c, struct page *page, -+ struct ubifs_inode *ui, int appending) -+{ -+ if (appending) { -+ if (!ui->dirty) -+ ubifs_release_dirty_inode_budget(c, ui); -+ mutex_unlock(&ui->ui_mutex); -+ } -+ if (!PagePrivate(page)) { -+ if (PageChecked(page)) -+ release_new_page_budget(c); -+ else -+ release_existing_page_budget(c); -+ } -+} -+ -+static int ubifs_write_end(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned copied, -+ struct page *page, void *fsdata) -+{ -+ struct inode *inode = mapping->host; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ loff_t end_pos = pos + len; -+ int appending = !!(end_pos > inode->i_size); -+ -+ dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", -+ inode->i_ino, pos, page->index, len, copied, inode->i_size); -+ -+ if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) { -+ /* -+ * VFS copied less data to the page that it intended and -+ * declared in its '->write_begin()' call via the @len -+ * argument. If the page was not up-to-date, and @len was -+ * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did -+ * not load it from the media (for optimization reasons). This -+ * means that part of the page contains garbage. So read the -+ * page now. -+ */ -+ dbg_gen("copied %d instead of %d, read page and repeat", -+ copied, len); -+ cancel_budget(c, page, ui, appending); -+ -+ /* -+ * Return 0 to force VFS to repeat the whole operation, or the -+ * error code if 'do_readpage()' failes. -+ */ -+ copied = do_readpage(page); -+ goto out; -+ } -+ -+ if (!PagePrivate(page)) { -+ SetPagePrivate(page); -+ atomic_long_inc(&c->dirty_pg_cnt); -+ __set_page_dirty_nobuffers(page); -+ } -+ -+ if (appending) { -+ i_size_write(inode, end_pos); -+ ui->ui_size = end_pos; -+ /* -+ * Note, we do not set @I_DIRTY_PAGES (which means that the -+ * inode has dirty pages), this has been done in -+ * '__set_page_dirty_nobuffers()'. -+ */ -+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC); -+ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); -+ mutex_unlock(&ui->ui_mutex); -+ } -+ -+out: -+ unlock_page(page); -+ page_cache_release(page); -+ return copied; -+} -+ -+/** -+ * populate_page - copy data nodes into a page for bulk-read. -+ * @c: UBIFS file-system description object -+ * @page: page -+ * @bu: bulk-read information -+ * @n: next zbranch slot -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int populate_page(struct ubifs_info *c, struct page *page, -+ struct bu_info *bu, int *n) -+{ -+ int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; -+ struct inode *inode = page->mapping->host; -+ loff_t i_size = i_size_read(inode); -+ unsigned int page_block; -+ void *addr, *zaddr; -+ pgoff_t end_index; -+ -+ dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", -+ inode->i_ino, page->index, i_size, page->flags); -+ -+ addr = zaddr = kmap(page); -+ -+ end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; -+ if (!i_size || page->index > end_index) { -+ hole = 1; -+ memset(addr, 0, PAGE_CACHE_SIZE); -+ goto out_hole; -+ } -+ -+ page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; -+ while (1) { -+ int err, len, out_len, dlen; -+ -+ if (nn >= bu->cnt) { -+ hole = 1; -+ memset(addr, 0, UBIFS_BLOCK_SIZE); -+ } else if (key_block(c, &bu->zbranch[nn].key) == page_block) { -+ struct ubifs_data_node *dn; -+ -+ dn = bu->buf + (bu->zbranch[nn].offs - offs); -+ -+ ubifs_assert(le64_to_cpu(dn->ch.sqnum) > -+ ubifs_inode(inode)->creat_sqnum); -+ -+ len = le32_to_cpu(dn->size); -+ if (len <= 0 || len > UBIFS_BLOCK_SIZE) -+ goto out_err; -+ -+ dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; -+ out_len = UBIFS_BLOCK_SIZE; -+ err = ubifs_decompress(&dn->data, dlen, addr, &out_len, -+ le16_to_cpu(dn->compr_type)); -+ if (err || len != out_len) -+ goto out_err; -+ -+ if (len < UBIFS_BLOCK_SIZE) -+ memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); -+ -+ nn += 1; -+ read = (i << UBIFS_BLOCK_SHIFT) + len; -+ } else if (key_block(c, &bu->zbranch[nn].key) < page_block) { -+ nn += 1; -+ continue; -+ } else { -+ hole = 1; -+ memset(addr, 0, UBIFS_BLOCK_SIZE); -+ } -+ if (++i >= UBIFS_BLOCKS_PER_PAGE) -+ break; -+ addr += UBIFS_BLOCK_SIZE; -+ page_block += 1; -+ } -+ -+ if (end_index == page->index) { -+ int len = i_size & (PAGE_CACHE_SIZE - 1); -+ -+ if (len && len < read) -+ memset(zaddr + len, 0, read - len); -+ } -+ -+out_hole: -+ if (hole) { -+ SetPageChecked(page); -+ dbg_gen("hole"); -+ } -+ -+ SetPageUptodate(page); -+ ClearPageError(page); -+ flush_dcache_page(page); -+ kunmap(page); -+ *n = nn; -+ return 0; -+ -+out_err: -+ ClearPageUptodate(page); -+ SetPageError(page); -+ flush_dcache_page(page); -+ kunmap(page); -+ ubifs_err("bad data node (block %u, inode %lu)", -+ page_block, inode->i_ino); -+ return -EINVAL; -+} -+ -+/** -+ * ubifs_do_bulk_read - do bulk-read. -+ * @c: UBIFS file-system description object -+ * @bu: bulk-read information -+ * @page1: first page to read -+ * -+ * This function returns %1 if the bulk-read is done, otherwise %0 is returned. -+ */ -+static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, -+ struct page *page1) -+{ -+ pgoff_t offset = page1->index, end_index; -+ struct address_space *mapping = page1->mapping; -+ struct inode *inode = mapping->host; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ int err, page_idx, page_cnt, ret = 0, n = 0; -+ int allocate = bu->buf ? 0 : 1; -+ loff_t isize; -+ -+ err = ubifs_tnc_get_bu_keys(c, bu); -+ if (err) -+ goto out_warn; -+ -+ if (bu->eof) { -+ /* Turn off bulk-read at the end of the file */ -+ ui->read_in_a_row = 1; -+ ui->bulk_read = 0; -+ } -+ -+ page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT; -+ if (!page_cnt) { -+ /* -+ * This happens when there are multiple blocks per page and the -+ * blocks for the first page we are looking for, are not -+ * together. If all the pages were like this, bulk-read would -+ * reduce performance, so we turn it off for a while. -+ */ -+ goto out_bu_off; -+ } -+ -+ if (bu->cnt) { -+ if (allocate) { -+ /* -+ * Allocate bulk-read buffer depending on how many data -+ * nodes we are going to read. -+ */ -+ bu->buf_len = bu->zbranch[bu->cnt - 1].offs + -+ bu->zbranch[bu->cnt - 1].len - -+ bu->zbranch[0].offs; -+ ubifs_assert(bu->buf_len > 0); -+ ubifs_assert(bu->buf_len <= c->leb_size); -+ bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN); -+ if (!bu->buf) -+ goto out_bu_off; -+ } -+ -+ err = ubifs_tnc_bulk_read(c, bu); -+ if (err) -+ goto out_warn; -+ } -+ -+ err = populate_page(c, page1, bu, &n); -+ if (err) -+ goto out_warn; -+ -+ unlock_page(page1); -+ ret = 1; -+ -+ isize = i_size_read(inode); -+ if (isize == 0) -+ goto out_free; -+ end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); -+ -+ for (page_idx = 1; page_idx < page_cnt; page_idx++) { -+ pgoff_t page_offset = offset + page_idx; -+ struct page *page; -+ -+ if (page_offset > end_index) -+ break; -+ page = find_or_create_page(mapping, page_offset, -+ GFP_NOFS | __GFP_COLD); -+ if (!page) -+ break; -+ if (!PageUptodate(page)) -+ err = populate_page(c, page, bu, &n); -+ unlock_page(page); -+ page_cache_release(page); -+ if (err) -+ break; -+ } -+ -+ ui->last_page_read = offset + page_idx - 1; -+ -+out_free: -+ if (allocate) -+ kfree(bu->buf); -+ return ret; -+ -+out_warn: -+ ubifs_warn("ignoring error %d and skipping bulk-read", err); -+ goto out_free; -+ -+out_bu_off: -+ ui->read_in_a_row = ui->bulk_read = 0; -+ goto out_free; -+} -+ -+/** -+ * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. -+ * @page: page from which to start bulk-read. -+ * -+ * Some flash media are capable of reading sequentially at faster rates. UBIFS -+ * bulk-read facility is designed to take advantage of that, by reading in one -+ * go consecutive data nodes that are also located consecutively in the same -+ * LEB. This function returns %1 if a bulk-read is done and %0 otherwise. -+ */ -+static int ubifs_bulk_read(struct page *page) -+{ -+ struct inode *inode = page->mapping->host; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ pgoff_t index = page->index, last_page_read = ui->last_page_read; -+ struct bu_info *bu; -+ int err = 0, allocated = 0; -+ -+ ui->last_page_read = index; -+ if (!c->bulk_read) -+ return 0; -+ -+ /* -+ * Bulk-read is protected by @ui->ui_mutex, but it is an optimization, -+ * so don't bother if we cannot lock the mutex. -+ */ -+ if (!mutex_trylock(&ui->ui_mutex)) -+ return 0; -+ -+ if (index != last_page_read + 1) { -+ /* Turn off bulk-read if we stop reading sequentially */ -+ ui->read_in_a_row = 1; -+ if (ui->bulk_read) -+ ui->bulk_read = 0; -+ goto out_unlock; -+ } -+ -+ if (!ui->bulk_read) { -+ ui->read_in_a_row += 1; -+ if (ui->read_in_a_row < 3) -+ goto out_unlock; -+ /* Three reads in a row, so switch on bulk-read */ -+ ui->bulk_read = 1; -+ } -+ -+ /* -+ * If possible, try to use pre-allocated bulk-read information, which -+ * is protected by @c->bu_mutex. -+ */ -+ if (mutex_trylock(&c->bu_mutex)) -+ bu = &c->bu; -+ else { -+ bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN); -+ if (!bu) -+ goto out_unlock; -+ -+ bu->buf = NULL; -+ allocated = 1; -+ } -+ -+ bu->buf_len = c->max_bu_buf_len; -+ data_key_init(c, &bu->key, inode->i_ino, -+ page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); -+ err = ubifs_do_bulk_read(c, bu, page); -+ -+ if (!allocated) -+ mutex_unlock(&c->bu_mutex); -+ else -+ kfree(bu); -+ -+out_unlock: -+ mutex_unlock(&ui->ui_mutex); -+ return err; -+} -+ -+static int ubifs_readpage(struct file *file, struct page *page) -+{ -+ if (ubifs_bulk_read(page)) -+ return 0; -+ do_readpage(page); -+ unlock_page(page); -+ return 0; -+} -+ -+static int do_writepage(struct page *page, int len) -+{ -+ int err = 0, i, blen; -+ unsigned int block; -+ void *addr; -+ union ubifs_key key; -+ struct inode *inode = page->mapping->host; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ -+#ifdef UBIFS_DEBUG -+ spin_lock(&ui->ui_lock); -+ ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); -+ spin_unlock(&ui->ui_lock); -+#endif -+ -+ /* Update radix tree tags */ -+ set_page_writeback(page); -+ -+ addr = kmap(page); -+ block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; -+ i = 0; -+ while (len) { -+ blen = min_t(int, len, UBIFS_BLOCK_SIZE); -+ data_key_init(c, &key, inode->i_ino, block); -+ err = ubifs_jnl_write_data(c, inode, &key, addr, blen); -+ if (err) -+ break; -+ if (++i >= UBIFS_BLOCKS_PER_PAGE) -+ break; -+ block += 1; -+ addr += blen; -+ len -= blen; -+ } -+ if (err) { -+ SetPageError(page); -+ ubifs_err("cannot write page %lu of inode %lu, error %d", -+ page->index, inode->i_ino, err); -+ ubifs_ro_mode(c, err); -+ } -+ -+ ubifs_assert(PagePrivate(page)); -+ if (PageChecked(page)) -+ release_new_page_budget(c); -+ else -+ release_existing_page_budget(c); -+ -+ atomic_long_dec(&c->dirty_pg_cnt); -+ ClearPagePrivate(page); -+ ClearPageChecked(page); -+ -+ kunmap(page); -+ unlock_page(page); -+ end_page_writeback(page); -+ return err; -+} -+ -+/* -+ * When writing-back dirty inodes, VFS first writes-back pages belonging to the -+ * inode, then the inode itself. For UBIFS this may cause a problem. Consider a -+ * situation when a we have an inode with size 0, then a megabyte of data is -+ * appended to the inode, then write-back starts and flushes some amount of the -+ * dirty pages, the journal becomes full, commit happens and finishes, and then -+ * an unclean reboot happens. When the file system is mounted next time, the -+ * inode size would still be 0, but there would be many pages which are beyond -+ * the inode size, they would be indexed and consume flash space. Because the -+ * journal has been committed, the replay would not be able to detect this -+ * situation and correct the inode size. This means UBIFS would have to scan -+ * whole index and correct all inode sizes, which is long an unacceptable. -+ * -+ * To prevent situations like this, UBIFS writes pages back only if they are -+ * within last synchronized inode size, i.e. the the size which has been -+ * written to the flash media last time. Otherwise, UBIFS forces inode -+ * write-back, thus making sure the on-flash inode contains current inode size, -+ * and then keeps writing pages back. -+ * -+ * Some locking issues explanation. 'ubifs_writepage()' first is called with -+ * the page locked, and it locks @ui_mutex. However, write-back does take inode -+ * @i_mutex, which means other VFS operations may be run on this inode at the -+ * same time. And the problematic one is truncation to smaller size, from where -+ * we have to call 'vmtruncate()', which first changes @inode->i_size, then -+ * drops the truncated pages. And while dropping the pages, it takes the page -+ * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with -+ * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This -+ * means that @inode->i_size is changed while @ui_mutex is unlocked. -+ * -+ * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond -+ * inode size. How do we do this if @inode->i_size may became smaller while we -+ * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the -+ * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size -+ * internally and updates it under @ui_mutex. -+ * -+ * Q: why we do not worry that if we race with truncation, we may end up with a -+ * situation when the inode is truncated while we are in the middle of -+ * 'do_writepage()', so we do write beyond inode size? -+ * A: If we are in the middle of 'do_writepage()', truncation would be locked -+ * on the page lock and it would not write the truncated inode node to the -+ * journal before we have finished. -+ */ -+static int ubifs_writepage(struct page *page, struct writeback_control *wbc) -+{ -+ struct inode *inode = page->mapping->host; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ loff_t i_size = i_size_read(inode), synced_i_size; -+ pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; -+ int err, len = i_size & (PAGE_CACHE_SIZE - 1); -+ void *kaddr; -+ -+ dbg_gen("ino %lu, pg %lu, pg flags %#lx", -+ inode->i_ino, page->index, page->flags); -+ ubifs_assert(PagePrivate(page)); -+ -+ /* Is the page fully outside @i_size? (truncate in progress) */ -+ if (page->index > end_index || (page->index == end_index && !len)) { -+ err = 0; -+ goto out_unlock; -+ } -+ -+ spin_lock(&ui->ui_lock); -+ synced_i_size = ui->synced_i_size; -+ spin_unlock(&ui->ui_lock); -+ -+ /* Is the page fully inside @i_size? */ -+ if (page->index < end_index) { -+ if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { -+ err = inode->i_sb->s_op->write_inode(inode, 1); -+ if (err) -+ goto out_unlock; -+ /* -+ * The inode has been written, but the write-buffer has -+ * not been synchronized, so in case of an unclean -+ * reboot we may end up with some pages beyond inode -+ * size, but they would be in the journal (because -+ * commit flushes write buffers) and recovery would deal -+ * with this. -+ */ -+ } -+ return do_writepage(page, PAGE_CACHE_SIZE); -+ } -+ -+ /* -+ * The page straddles @i_size. It must be zeroed out on each and every -+ * writepage invocation because it may be mmapped. "A file is mapped -+ * in multiples of the page size. For a file that is not a multiple of -+ * the page size, the remaining memory is zeroed when mapped, and -+ * writes to that region are not written out to the file." -+ */ -+ kaddr = kmap_atomic(page, KM_USER0); -+ memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); -+ flush_dcache_page(page); -+ kunmap_atomic(kaddr, KM_USER0); -+ -+ if (i_size > synced_i_size) { -+ err = inode->i_sb->s_op->write_inode(inode, 1); -+ if (err) -+ goto out_unlock; -+ } -+ -+ return do_writepage(page, len); -+ -+out_unlock: -+ unlock_page(page); -+ return err; -+} -+ -+/** -+ * do_attr_changes - change inode attributes. -+ * @inode: inode to change attributes for -+ * @attr: describes attributes to change -+ */ -+static void do_attr_changes(struct inode *inode, const struct iattr *attr) -+{ -+ if (attr->ia_valid & ATTR_UID) -+ inode->i_uid = attr->ia_uid; -+ if (attr->ia_valid & ATTR_GID) -+ inode->i_gid = attr->ia_gid; -+ if (attr->ia_valid & ATTR_ATIME) -+ inode->i_atime = timespec_trunc(attr->ia_atime, -+ inode->i_sb->s_time_gran); -+ if (attr->ia_valid & ATTR_MTIME) -+ inode->i_mtime = timespec_trunc(attr->ia_mtime, -+ inode->i_sb->s_time_gran); -+ if (attr->ia_valid & ATTR_CTIME) -+ inode->i_ctime = timespec_trunc(attr->ia_ctime, -+ inode->i_sb->s_time_gran); -+ if (attr->ia_valid & ATTR_MODE) { -+ umode_t mode = attr->ia_mode; -+ -+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) -+ mode &= ~S_ISGID; -+ inode->i_mode = mode; -+ } -+} -+ -+/** -+ * do_truncation - truncate an inode. -+ * @c: UBIFS file-system description object -+ * @inode: inode to truncate -+ * @attr: inode attribute changes description -+ * -+ * This function implements VFS '->setattr()' call when the inode is truncated -+ * to a smaller size. Returns zero in case of success and a negative error code -+ * in case of failure. -+ */ -+static int do_truncation(struct ubifs_info *c, struct inode *inode, -+ const struct iattr *attr) -+{ -+ int err; -+ struct ubifs_budget_req req; -+ loff_t old_size = inode->i_size, new_size = attr->ia_size; -+ int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); -+ memset(&req, 0, sizeof(struct ubifs_budget_req)); -+ -+ /* -+ * If this is truncation to a smaller size, and we do not truncate on a -+ * block boundary, budget for changing one data block, because the last -+ * block will be re-written. -+ */ -+ if (new_size & (UBIFS_BLOCK_SIZE - 1)) -+ req.dirtied_page = 1; -+ -+ req.dirtied_ino = 1; -+ /* A funny way to budget for truncation node */ -+ req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; -+ err = ubifs_budget_space(c, &req); -+ if (err) { -+ /* -+ * Treat truncations to zero as deletion and always allow them, -+ * just like we do for '->unlink()'. -+ */ -+ if (new_size || err != -ENOSPC) -+ return err; -+ budgeted = 0; -+ } -+ -+ err = vmtruncate(inode, new_size); -+ if (err) -+ goto out_budg; -+ -+ if (offset) { -+ pgoff_t index = new_size >> PAGE_CACHE_SHIFT; -+ struct page *page; -+ -+ page = find_lock_page(inode->i_mapping, index); -+ if (page) { -+ if (PageDirty(page)) { -+ /* -+ * 'ubifs_jnl_truncate()' will try to truncate -+ * the last data node, but it contains -+ * out-of-date data because the page is dirty. -+ * Write the page now, so that -+ * 'ubifs_jnl_truncate()' will see an already -+ * truncated (and up to date) data node. -+ */ -+ ubifs_assert(PagePrivate(page)); -+ -+ clear_page_dirty_for_io(page); -+ if (UBIFS_BLOCKS_PER_PAGE_SHIFT) -+ offset = new_size & -+ (PAGE_CACHE_SIZE - 1); -+ err = do_writepage(page, offset); -+ page_cache_release(page); -+ if (err) -+ goto out_budg; -+ /* -+ * We could now tell 'ubifs_jnl_truncate()' not -+ * to read the last block. -+ */ -+ } else { -+ /* -+ * We could 'kmap()' the page and pass the data -+ * to 'ubifs_jnl_truncate()' to save it from -+ * having to read it. -+ */ -+ unlock_page(page); -+ page_cache_release(page); -+ } -+ } -+ } -+ -+ mutex_lock(&ui->ui_mutex); -+ ui->ui_size = inode->i_size; -+ /* Truncation changes inode [mc]time */ -+ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); -+ /* The other attributes may be changed at the same time as well */ -+ do_attr_changes(inode, attr); -+ -+ err = ubifs_jnl_truncate(c, inode, old_size, new_size); -+ mutex_unlock(&ui->ui_mutex); -+out_budg: -+ if (budgeted) -+ ubifs_release_budget(c, &req); -+ else { -+ c->nospace = c->nospace_rp = 0; -+ smp_wmb(); -+ } -+ return err; -+} -+ -+/** -+ * do_setattr - change inode attributes. -+ * @c: UBIFS file-system description object -+ * @inode: inode to change attributes for -+ * @attr: inode attribute changes description -+ * -+ * This function implements VFS '->setattr()' call for all cases except -+ * truncations to smaller size. Returns zero in case of success and a negative -+ * error code in case of failure. -+ */ -+static int do_setattr(struct ubifs_info *c, struct inode *inode, -+ const struct iattr *attr) -+{ -+ int err, release; -+ loff_t new_size = attr->ia_size; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ struct ubifs_budget_req req = { .dirtied_ino = 1, -+ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ if (attr->ia_valid & ATTR_SIZE) { -+ dbg_gen("size %lld -> %lld", inode->i_size, new_size); -+ err = vmtruncate(inode, new_size); -+ if (err) -+ goto out; -+ } -+ -+ mutex_lock(&ui->ui_mutex); -+ if (attr->ia_valid & ATTR_SIZE) { -+ /* Truncation changes inode [mc]time */ -+ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); -+ /* 'vmtruncate()' changed @i_size, update @ui_size */ -+ ui->ui_size = inode->i_size; -+ } -+ -+ do_attr_changes(inode, attr); -+ -+ release = ui->dirty; -+ if (attr->ia_valid & ATTR_SIZE) -+ /* -+ * Inode length changed, so we have to make sure -+ * @I_DIRTY_DATASYNC is set. -+ */ -+ __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); -+ else -+ mark_inode_dirty_sync(inode); -+ mutex_unlock(&ui->ui_mutex); -+ -+ if (release) -+ ubifs_release_budget(c, &req); -+ if (IS_SYNC(inode)) -+ err = inode->i_sb->s_op->write_inode(inode, 1); -+ return err; -+ -+out: -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+int ubifs_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ int err; -+ struct inode *inode = dentry->d_inode; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ -+ dbg_gen("ino %lu, mode %#x, ia_valid %#x", -+ inode->i_ino, inode->i_mode, attr->ia_valid); -+ err = inode_change_ok(inode, attr); -+ if (err) -+ return err; -+ -+ err = dbg_check_synced_i_size(inode); -+ if (err) -+ return err; -+ -+ if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size) -+ /* Truncation to a smaller size */ -+ err = do_truncation(c, inode, attr); -+ else -+ err = do_setattr(c, inode, attr); -+ -+ return err; -+} -+ -+static void ubifs_invalidatepage(struct page *page, unsigned long offset) -+{ -+ struct inode *inode = page->mapping->host; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ -+ ubifs_assert(PagePrivate(page)); -+ if (offset) -+ /* Partial page remains dirty */ -+ return; -+ -+ if (PageChecked(page)) -+ release_new_page_budget(c); -+ else -+ release_existing_page_budget(c); -+ -+ atomic_long_dec(&c->dirty_pg_cnt); -+ ClearPagePrivate(page); -+ ClearPageChecked(page); -+} -+ -+static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ struct ubifs_inode *ui = ubifs_inode(dentry->d_inode); -+ -+ nd_set_link(nd, ui->data); -+ return NULL; -+} -+ -+int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ struct inode *inode = dentry->d_inode; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ int err; -+ -+ dbg_gen("syncing inode %lu", inode->i_ino); -+ -+ /* -+ * VFS has already synchronized dirty pages for this inode. Synchronize -+ * the inode unless this is a 'datasync()' call. -+ */ -+ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { -+ err = inode->i_sb->s_op->write_inode(inode, 1); -+ if (err) -+ return err; -+ } -+ -+ /* -+ * Nodes related to this inode may still sit in a write-buffer. Flush -+ * them. -+ */ -+ err = ubifs_sync_wbufs_by_inode(c, inode); -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/** -+ * mctime_update_needed - check if mtime or ctime update is needed. -+ * @inode: the inode to do the check for -+ * @now: current time -+ * -+ * This helper function checks if the inode mtime/ctime should be updated or -+ * not. If current values of the time-stamps are within the UBIFS inode time -+ * granularity, they are not updated. This is an optimization. -+ */ -+static inline int mctime_update_needed(struct inode *inode, -+ struct timespec *now) -+{ -+ if (!timespec_equal(&inode->i_mtime, now) || -+ !timespec_equal(&inode->i_ctime, now)) -+ return 1; -+ return 0; -+} -+ -+/** -+ * update_ctime - update mtime and ctime of an inode. -+ * @c: UBIFS file-system description object -+ * @inode: inode to update -+ * -+ * This function updates mtime and ctime of the inode if it is not equivalent to -+ * current time. Returns zero in case of success and a negative error code in -+ * case of failure. -+ */ -+static int update_mctime(struct ubifs_info *c, struct inode *inode) -+{ -+ struct timespec now = ubifs_current_time(inode); -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ if (mctime_update_needed(inode, &now)) { -+ int err, release; -+ struct ubifs_budget_req req = { .dirtied_ino = 1, -+ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ mutex_lock(&ui->ui_mutex); -+ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); -+ release = ui->dirty; -+ mark_inode_dirty_sync(inode); -+ mutex_unlock(&ui->ui_mutex); -+ if (release) -+ ubifs_release_budget(c, &req); -+ } -+ -+ return 0; -+} -+ -+static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, -+ unsigned long nr_segs, loff_t pos) -+{ -+ int err; -+ ssize_t ret; -+ struct inode *inode = iocb->ki_filp->f_mapping->host; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ -+ err = update_mctime(c, inode); -+ if (err) -+ return err; -+ -+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos); -+ if (ret < 0) -+ return ret; -+ -+ if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { -+ err = ubifs_sync_wbufs_by_inode(c, inode); -+ if (err) -+ return err; -+ } -+ -+ return ret; -+} -+ -+static int ubifs_set_page_dirty(struct page *page) -+{ -+ int ret; -+ -+ ret = __set_page_dirty_nobuffers(page); -+ /* -+ * An attempt to dirty a page without budgeting for it - should not -+ * happen. -+ */ -+ ubifs_assert(ret == 0); -+ return ret; -+} -+ -+static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) -+{ -+ /* -+ * An attempt to release a dirty page without budgeting for it - should -+ * not happen. -+ */ -+ if (PageWriteback(page)) -+ return 0; -+ ubifs_assert(PagePrivate(page)); -+ ubifs_assert(0); -+ ClearPagePrivate(page); -+ ClearPageChecked(page); -+ return 1; -+} -+ -+/* -+ * mmap()d file has taken write protection fault and is being made -+ * writable. UBIFS must ensure page is budgeted for. -+ */ -+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) -+{ -+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ struct timespec now = ubifs_current_time(inode); -+ struct ubifs_budget_req req = { .new_page = 1 }; -+ int err, update_time; -+ -+ dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, -+ i_size_read(inode)); -+ ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); -+ -+ if (unlikely(c->ro_media)) -+ return -EROFS; -+ -+ /* -+ * We have not locked @page so far so we may budget for changing the -+ * page. Note, we cannot do this after we locked the page, because -+ * budgeting may cause write-back which would cause deadlock. -+ * -+ * At the moment we do not know whether the page is dirty or not, so we -+ * assume that it is not and budget for a new page. We could look at -+ * the @PG_private flag and figure this out, but we may race with write -+ * back and the page state may change by the time we lock it, so this -+ * would need additional care. We do not bother with this at the -+ * moment, although it might be good idea to do. Instead, we allocate -+ * budget for a new page and amend it later on if the page was in fact -+ * dirty. -+ * -+ * The budgeting-related logic of this function is similar to what we -+ * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there -+ * for more comments. -+ */ -+ update_time = mctime_update_needed(inode, &now); -+ if (update_time) -+ /* -+ * We have to change inode time stamp which requires extra -+ * budgeting. -+ */ -+ req.dirtied_ino = 1; -+ -+ err = ubifs_budget_space(c, &req); -+ if (unlikely(err)) { -+ if (err == -ENOSPC) -+ ubifs_warn("out of space for mmapped file " -+ "(inode number %lu)", inode->i_ino); -+ return err; -+ } -+ -+ lock_page(page); -+ if (unlikely(page->mapping != inode->i_mapping || -+ page_offset(page) > i_size_read(inode))) { -+ /* Page got truncated out from underneath us */ -+ err = -EINVAL; -+ goto out_unlock; -+ } -+ -+ if (PagePrivate(page)) -+ release_new_page_budget(c); -+ else { -+ if (!PageChecked(page)) -+ ubifs_convert_page_budget(c); -+ SetPagePrivate(page); -+ atomic_long_inc(&c->dirty_pg_cnt); -+ __set_page_dirty_nobuffers(page); -+ } -+ -+ if (update_time) { -+ int release; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ mutex_lock(&ui->ui_mutex); -+ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); -+ release = ui->dirty; -+ mark_inode_dirty_sync(inode); -+ mutex_unlock(&ui->ui_mutex); -+ if (release) -+ ubifs_release_dirty_inode_budget(c, ui); -+ } -+ -+ unlock_page(page); -+ return 0; -+ -+out_unlock: -+ unlock_page(page); -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+static struct vm_operations_struct ubifs_file_vm_ops = { -+ .fault = filemap_fault, -+ .page_mkwrite = ubifs_vm_page_mkwrite, -+}; -+ -+static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ int err; -+ -+ /* 'generic_file_mmap()' takes care of NOMMU case */ -+ err = generic_file_mmap(file, vma); -+ if (err) -+ return err; -+ vma->vm_ops = &ubifs_file_vm_ops; -+ return 0; -+} -+ -+const struct address_space_operations ubifs_file_address_operations = { -+ .readpage = ubifs_readpage, -+ .writepage = ubifs_writepage, -+ .write_begin = ubifs_write_begin, -+ .write_end = ubifs_write_end, -+ .invalidatepage = ubifs_invalidatepage, -+ .set_page_dirty = ubifs_set_page_dirty, -+ .releasepage = ubifs_releasepage, -+}; -+ -+const struct inode_operations ubifs_file_inode_operations = { -+ .setattr = ubifs_setattr, -+ .getattr = ubifs_getattr, -+#ifdef CONFIG_UBIFS_FS_XATTR -+ .setxattr = ubifs_setxattr, -+ .getxattr = ubifs_getxattr, -+ .listxattr = ubifs_listxattr, -+ .removexattr = ubifs_removexattr, -+#endif -+}; -+ -+const struct inode_operations ubifs_symlink_inode_operations = { -+ .readlink = generic_readlink, -+ .follow_link = ubifs_follow_link, -+ .setattr = ubifs_setattr, -+ .getattr = ubifs_getattr, -+}; -+ -+const struct file_operations ubifs_file_operations = { -+ .llseek = generic_file_llseek, -+ .read = do_sync_read, -+ .write = do_sync_write, -+ .aio_read = generic_file_aio_read, -+ .aio_write = ubifs_aio_write, -+ .mmap = ubifs_file_mmap, -+ .fsync = ubifs_fsync, -+ .unlocked_ioctl = ubifs_ioctl, -+ .splice_read = generic_file_splice_read, -+ .splice_write = generic_file_splice_write, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = ubifs_compat_ioctl, -+#endif -+}; -diff -Nurd linux-2.6.24.orig/fs/ubifs/find.c linux-2.6.24/fs/ubifs/find.c ---- linux-2.6.24.orig/fs/ubifs/find.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/find.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,977 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file contains functions for finding LEBs for various purposes e.g. -+ * garbage collection. In general, lprops category heaps and lists are used -+ * for fast access, falling back on scanning the LPT as a last resort. -+ */ -+ -+#include <linux/sort.h> -+#include "ubifs.h" -+ -+/** -+ * struct scan_data - data provided to scan callback functions -+ * @min_space: minimum number of bytes for which to scan -+ * @pick_free: whether it is OK to scan for empty LEBs -+ * @lnum: LEB number found is returned here -+ * @exclude_index: whether to exclude index LEBs -+ */ -+struct scan_data { -+ int min_space; -+ int pick_free; -+ int lnum; -+ int exclude_index; -+}; -+ -+/** -+ * valuable - determine whether LEB properties are valuable. -+ * @c: the UBIFS file-system description object -+ * @lprops: LEB properties -+ * -+ * This function return %1 if the LEB properties should be added to the LEB -+ * properties tree in memory. Otherwise %0 is returned. -+ */ -+static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) -+{ -+ int n, cat = lprops->flags & LPROPS_CAT_MASK; -+ struct ubifs_lpt_heap *heap; -+ -+ switch (cat) { -+ case LPROPS_DIRTY: -+ case LPROPS_DIRTY_IDX: -+ case LPROPS_FREE: -+ heap = &c->lpt_heap[cat - 1]; -+ if (heap->cnt < heap->max_cnt) -+ return 1; -+ if (lprops->free + lprops->dirty >= c->dark_wm) -+ return 1; -+ return 0; -+ case LPROPS_EMPTY: -+ n = c->lst.empty_lebs + c->freeable_cnt - -+ c->lst.taken_empty_lebs; -+ if (n < c->lsave_cnt) -+ return 1; -+ return 0; -+ case LPROPS_FREEABLE: -+ return 1; -+ case LPROPS_FRDI_IDX: -+ return 1; -+ } -+ return 0; -+} -+ -+/** -+ * scan_for_dirty_cb - dirty space scan callback. -+ * @c: the UBIFS file-system description object -+ * @lprops: LEB properties to scan -+ * @in_tree: whether the LEB properties are in main memory -+ * @data: information passed to and from the caller of the scan -+ * -+ * This function returns a code that indicates whether the scan should continue -+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree -+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop -+ * (%LPT_SCAN_STOP). -+ */ -+static int scan_for_dirty_cb(struct ubifs_info *c, -+ const struct ubifs_lprops *lprops, int in_tree, -+ struct scan_data *data) -+{ -+ int ret = LPT_SCAN_CONTINUE; -+ -+ /* Exclude LEBs that are currently in use */ -+ if (lprops->flags & LPROPS_TAKEN) -+ return LPT_SCAN_CONTINUE; -+ /* Determine whether to add these LEB properties to the tree */ -+ if (!in_tree && valuable(c, lprops)) -+ ret |= LPT_SCAN_ADD; -+ /* Exclude LEBs with too little space */ -+ if (lprops->free + lprops->dirty < data->min_space) -+ return ret; -+ /* If specified, exclude index LEBs */ -+ if (data->exclude_index && lprops->flags & LPROPS_INDEX) -+ return ret; -+ /* If specified, exclude empty or freeable LEBs */ -+ if (lprops->free + lprops->dirty == c->leb_size) { -+ if (!data->pick_free) -+ return ret; -+ /* Exclude LEBs with too little dirty space (unless it is empty) */ -+ } else if (lprops->dirty < c->dead_wm) -+ return ret; -+ /* Finally we found space */ -+ data->lnum = lprops->lnum; -+ return LPT_SCAN_ADD | LPT_SCAN_STOP; -+} -+ -+/** -+ * scan_for_dirty - find a data LEB with free space. -+ * @c: the UBIFS file-system description object -+ * @min_space: minimum amount free plus dirty space the returned LEB has to -+ * have -+ * @pick_free: if it is OK to return a free or freeable LEB -+ * @exclude_index: whether to exclude index LEBs -+ * -+ * This function returns a pointer to the LEB properties found or a negative -+ * error code. -+ */ -+static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, -+ int min_space, int pick_free, -+ int exclude_index) -+{ -+ const struct ubifs_lprops *lprops; -+ struct ubifs_lpt_heap *heap; -+ struct scan_data data; -+ int err, i; -+ -+ /* There may be an LEB with enough dirty space on the free heap */ -+ heap = &c->lpt_heap[LPROPS_FREE - 1]; -+ for (i = 0; i < heap->cnt; i++) { -+ lprops = heap->arr[i]; -+ if (lprops->free + lprops->dirty < min_space) -+ continue; -+ if (lprops->dirty < c->dead_wm) -+ continue; -+ return lprops; -+ } -+ /* -+ * A LEB may have fallen off of the bottom of the dirty heap, and ended -+ * up as uncategorized even though it has enough dirty space for us now, -+ * so check the uncategorized list. N.B. neither empty nor freeable LEBs -+ * can end up as uncategorized because they are kept on lists not -+ * finite-sized heaps. -+ */ -+ list_for_each_entry(lprops, &c->uncat_list, list) { -+ if (lprops->flags & LPROPS_TAKEN) -+ continue; -+ if (lprops->free + lprops->dirty < min_space) -+ continue; -+ if (exclude_index && (lprops->flags & LPROPS_INDEX)) -+ continue; -+ if (lprops->dirty < c->dead_wm) -+ continue; -+ return lprops; -+ } -+ /* We have looked everywhere in main memory, now scan the flash */ -+ if (c->pnodes_have >= c->pnode_cnt) -+ /* All pnodes are in memory, so skip scan */ -+ return ERR_PTR(-ENOSPC); -+ data.min_space = min_space; -+ data.pick_free = pick_free; -+ data.lnum = -1; -+ data.exclude_index = exclude_index; -+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, -+ (ubifs_lpt_scan_callback)scan_for_dirty_cb, -+ &data); -+ if (err) -+ return ERR_PTR(err); -+ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); -+ c->lscan_lnum = data.lnum; -+ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); -+ if (IS_ERR(lprops)) -+ return lprops; -+ ubifs_assert(lprops->lnum == data.lnum); -+ ubifs_assert(lprops->free + lprops->dirty >= min_space); -+ ubifs_assert(lprops->dirty >= c->dead_wm || -+ (pick_free && -+ lprops->free + lprops->dirty == c->leb_size)); -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX)); -+ return lprops; -+} -+ -+/** -+ * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector. -+ * @c: the UBIFS file-system description object -+ * @ret_lp: LEB properties are returned here on exit -+ * @min_space: minimum amount free plus dirty space the returned LEB has to -+ * have -+ * @pick_free: controls whether it is OK to pick empty or index LEBs -+ * -+ * This function tries to find a dirty logical eraseblock which has at least -+ * @min_space free and dirty space. It prefers to take an LEB from the dirty or -+ * dirty index heap, and it falls-back to LPT scanning if the heaps are empty -+ * or do not have an LEB which satisfies the @min_space criteria. -+ * -+ * Note, LEBs which have less than dead watermark of free + dirty space are -+ * never picked by this function. -+ * -+ * The additional @pick_free argument controls if this function has to return a -+ * free or freeable LEB if one is present. For example, GC must to set it to %1, -+ * when called from the journal space reservation function, because the -+ * appearance of free space may coincide with the loss of enough dirty space -+ * for GC to succeed anyway. -+ * -+ * In contrast, if the Garbage Collector is called from budgeting, it should -+ * just make free space, not return LEBs which are already free or freeable. -+ * -+ * In addition @pick_free is set to %2 by the recovery process in order to -+ * recover gc_lnum in which case an index LEB must not be returned. -+ * -+ * This function returns zero and the LEB properties of found dirty LEB in case -+ * of success, %-ENOSPC if no dirty LEB was found and a negative error code in -+ * case of other failures. The returned LEB is marked as "taken". -+ */ -+int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, -+ int min_space, int pick_free) -+{ -+ int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0; -+ const struct ubifs_lprops *lp = NULL, *idx_lp = NULL; -+ struct ubifs_lpt_heap *heap, *idx_heap; -+ -+ ubifs_get_lprops(c); -+ -+ if (pick_free) { -+ int lebs, rsvd_idx_lebs = 0; -+ -+ spin_lock(&c->space_lock); -+ lebs = c->lst.empty_lebs + c->idx_gc_cnt; -+ lebs += c->freeable_cnt - c->lst.taken_empty_lebs; -+ -+ /* -+ * Note, the index may consume more LEBs than have been reserved -+ * for it. It is OK because it might be consolidated by GC. -+ * But if the index takes fewer LEBs than it is reserved for it, -+ * this function must avoid picking those reserved LEBs. -+ */ -+ if (c->min_idx_lebs >= c->lst.idx_lebs) { -+ rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; -+ exclude_index = 1; -+ } -+ spin_unlock(&c->space_lock); -+ -+ /* Check if there are enough free LEBs for the index */ -+ if (rsvd_idx_lebs < lebs) { -+ /* OK, try to find an empty LEB */ -+ lp = ubifs_fast_find_empty(c); -+ if (lp) -+ goto found; -+ -+ /* Or a freeable LEB */ -+ lp = ubifs_fast_find_freeable(c); -+ if (lp) -+ goto found; -+ } else -+ /* -+ * We cannot pick free/freeable LEBs in the below code. -+ */ -+ pick_free = 0; -+ } else { -+ spin_lock(&c->space_lock); -+ exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); -+ spin_unlock(&c->space_lock); -+ } -+ -+ /* Look on the dirty and dirty index heaps */ -+ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; -+ idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; -+ -+ if (idx_heap->cnt && !exclude_index) { -+ idx_lp = idx_heap->arr[0]; -+ sum = idx_lp->free + idx_lp->dirty; -+ /* -+ * Since we reserve thrice as much space for the index than it -+ * actually takes, it does not make sense to pick indexing LEBs -+ * with less than, say, half LEB of dirty space. May be half is -+ * not the optimal boundary - this should be tested and -+ * checked. This boundary should determine how much we use -+ * in-the-gaps to consolidate the index comparing to how much -+ * we use garbage collector to consolidate it. The "half" -+ * criteria just feels to be fine. -+ */ -+ if (sum < min_space || sum < c->half_leb_size) -+ idx_lp = NULL; -+ } -+ -+ if (heap->cnt) { -+ lp = heap->arr[0]; -+ if (lp->dirty + lp->free < min_space) -+ lp = NULL; -+ } -+ -+ /* Pick the LEB with most space */ -+ if (idx_lp && lp) { -+ if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty) -+ lp = idx_lp; -+ } else if (idx_lp && !lp) -+ lp = idx_lp; -+ -+ if (lp) { -+ ubifs_assert(lp->free + lp->dirty >= c->dead_wm); -+ goto found; -+ } -+ -+ /* Did not find a dirty LEB on the dirty heaps, have to scan */ -+ dbg_find("scanning LPT for a dirty LEB"); -+ lp = scan_for_dirty(c, min_space, pick_free, exclude_index); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ ubifs_assert(lp->dirty >= c->dead_wm || -+ (pick_free && lp->free + lp->dirty == c->leb_size)); -+ -+found: -+ dbg_find("found LEB %d, free %d, dirty %d, flags %#x", -+ lp->lnum, lp->free, lp->dirty, lp->flags); -+ -+ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, -+ lp->flags | LPROPS_TAKEN, 0); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ -+ memcpy(ret_lp, lp, sizeof(struct ubifs_lprops)); -+ -+out: -+ ubifs_release_lprops(c); -+ return err; -+} -+ -+/** -+ * scan_for_free_cb - free space scan callback. -+ * @c: the UBIFS file-system description object -+ * @lprops: LEB properties to scan -+ * @in_tree: whether the LEB properties are in main memory -+ * @data: information passed to and from the caller of the scan -+ * -+ * This function returns a code that indicates whether the scan should continue -+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree -+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop -+ * (%LPT_SCAN_STOP). -+ */ -+static int scan_for_free_cb(struct ubifs_info *c, -+ const struct ubifs_lprops *lprops, int in_tree, -+ struct scan_data *data) -+{ -+ int ret = LPT_SCAN_CONTINUE; -+ -+ /* Exclude LEBs that are currently in use */ -+ if (lprops->flags & LPROPS_TAKEN) -+ return LPT_SCAN_CONTINUE; -+ /* Determine whether to add these LEB properties to the tree */ -+ if (!in_tree && valuable(c, lprops)) -+ ret |= LPT_SCAN_ADD; -+ /* Exclude index LEBs */ -+ if (lprops->flags & LPROPS_INDEX) -+ return ret; -+ /* Exclude LEBs with too little space */ -+ if (lprops->free < data->min_space) -+ return ret; -+ /* If specified, exclude empty LEBs */ -+ if (!data->pick_free && lprops->free == c->leb_size) -+ return ret; -+ /* -+ * LEBs that have only free and dirty space must not be allocated -+ * because they may have been unmapped already or they may have data -+ * that is obsolete only because of nodes that are still sitting in a -+ * wbuf. -+ */ -+ if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0) -+ return ret; -+ /* Finally we found space */ -+ data->lnum = lprops->lnum; -+ return LPT_SCAN_ADD | LPT_SCAN_STOP; -+} -+ -+/** -+ * do_find_free_space - find a data LEB with free space. -+ * @c: the UBIFS file-system description object -+ * @min_space: minimum amount of free space required -+ * @pick_free: whether it is OK to scan for empty LEBs -+ * @squeeze: whether to try to find space in a non-empty LEB first -+ * -+ * This function returns a pointer to the LEB properties found or a negative -+ * error code. -+ */ -+static -+const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, -+ int min_space, int pick_free, -+ int squeeze) -+{ -+ const struct ubifs_lprops *lprops; -+ struct ubifs_lpt_heap *heap; -+ struct scan_data data; -+ int err, i; -+ -+ if (squeeze) { -+ lprops = ubifs_fast_find_free(c); -+ if (lprops && lprops->free >= min_space) -+ return lprops; -+ } -+ if (pick_free) { -+ lprops = ubifs_fast_find_empty(c); -+ if (lprops) -+ return lprops; -+ } -+ if (!squeeze) { -+ lprops = ubifs_fast_find_free(c); -+ if (lprops && lprops->free >= min_space) -+ return lprops; -+ } -+ /* There may be an LEB with enough free space on the dirty heap */ -+ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; -+ for (i = 0; i < heap->cnt; i++) { -+ lprops = heap->arr[i]; -+ if (lprops->free >= min_space) -+ return lprops; -+ } -+ /* -+ * A LEB may have fallen off of the bottom of the free heap, and ended -+ * up as uncategorized even though it has enough free space for us now, -+ * so check the uncategorized list. N.B. neither empty nor freeable LEBs -+ * can end up as uncategorized because they are kept on lists not -+ * finite-sized heaps. -+ */ -+ list_for_each_entry(lprops, &c->uncat_list, list) { -+ if (lprops->flags & LPROPS_TAKEN) -+ continue; -+ if (lprops->flags & LPROPS_INDEX) -+ continue; -+ if (lprops->free >= min_space) -+ return lprops; -+ } -+ /* We have looked everywhere in main memory, now scan the flash */ -+ if (c->pnodes_have >= c->pnode_cnt) -+ /* All pnodes are in memory, so skip scan */ -+ return ERR_PTR(-ENOSPC); -+ data.min_space = min_space; -+ data.pick_free = pick_free; -+ data.lnum = -1; -+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, -+ (ubifs_lpt_scan_callback)scan_for_free_cb, -+ &data); -+ if (err) -+ return ERR_PTR(err); -+ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); -+ c->lscan_lnum = data.lnum; -+ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); -+ if (IS_ERR(lprops)) -+ return lprops; -+ ubifs_assert(lprops->lnum == data.lnum); -+ ubifs_assert(lprops->free >= min_space); -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); -+ return lprops; -+} -+ -+/** -+ * ubifs_find_free_space - find a data LEB with free space. -+ * @c: the UBIFS file-system description object -+ * @min_space: minimum amount of required free space -+ * @free: contains amount of free space in the LEB on exit -+ * @squeeze: whether to try to find space in a non-empty LEB first -+ * -+ * This function looks for an LEB with at least @min_space bytes of free space. -+ * It tries to find an empty LEB if possible. If no empty LEBs are available, -+ * this function searches for a non-empty data LEB. The returned LEB is marked -+ * as "taken". -+ * -+ * This function returns found LEB number in case of success, %-ENOSPC if it -+ * failed to find a LEB with @min_space bytes of free space and other a negative -+ * error codes in case of failure. -+ */ -+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, -+ int squeeze) -+{ -+ const struct ubifs_lprops *lprops; -+ int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags; -+ -+ dbg_find("min_space %d", min_space); -+ ubifs_get_lprops(c); -+ -+ /* Check if there are enough empty LEBs for commit */ -+ spin_lock(&c->space_lock); -+ if (c->min_idx_lebs > c->lst.idx_lebs) -+ rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; -+ else -+ rsvd_idx_lebs = 0; -+ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - -+ c->lst.taken_empty_lebs; -+ if (rsvd_idx_lebs < lebs) -+ /* -+ * OK to allocate an empty LEB, but we still don't want to go -+ * looking for one if there aren't any. -+ */ -+ if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { -+ pick_free = 1; -+ /* -+ * Because we release the space lock, we must account -+ * for this allocation here. After the LEB properties -+ * flags have been updated, we subtract one. Note, the -+ * result of this is that lprops also decreases -+ * @taken_empty_lebs in 'ubifs_change_lp()', so it is -+ * off by one for a short period of time which may -+ * introduce a small disturbance to budgeting -+ * calculations, but this is harmless because at the -+ * worst case this would make the budgeting subsystem -+ * be more pessimistic than needed. -+ * -+ * Fundamentally, this is about serialization of the -+ * budgeting and lprops subsystems. We could make the -+ * @space_lock a mutex and avoid dropping it before -+ * calling 'ubifs_change_lp()', but mutex is more -+ * heavy-weight, and we want budgeting to be as fast as -+ * possible. -+ */ -+ c->lst.taken_empty_lebs += 1; -+ } -+ spin_unlock(&c->space_lock); -+ -+ lprops = do_find_free_space(c, min_space, pick_free, squeeze); -+ if (IS_ERR(lprops)) { -+ err = PTR_ERR(lprops); -+ goto out; -+ } -+ -+ lnum = lprops->lnum; -+ flags = lprops->flags | LPROPS_TAKEN; -+ -+ lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0); -+ if (IS_ERR(lprops)) { -+ err = PTR_ERR(lprops); -+ goto out; -+ } -+ -+ if (pick_free) { -+ spin_lock(&c->space_lock); -+ c->lst.taken_empty_lebs -= 1; -+ spin_unlock(&c->space_lock); -+ } -+ -+ *free = lprops->free; -+ ubifs_release_lprops(c); -+ -+ if (*free == c->leb_size) { -+ /* -+ * Ensure that empty LEBs have been unmapped. They may not have -+ * been, for example, because of an unclean unmount. Also -+ * LEBs that were freeable LEBs (free + dirty == leb_size) will -+ * not have been unmapped. -+ */ -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } -+ -+ dbg_find("found LEB %d, free %d", lnum, *free); -+ ubifs_assert(*free >= min_space); -+ return lnum; -+ -+out: -+ if (pick_free) { -+ spin_lock(&c->space_lock); -+ c->lst.taken_empty_lebs -= 1; -+ spin_unlock(&c->space_lock); -+ } -+ ubifs_release_lprops(c); -+ return err; -+} -+ -+/** -+ * scan_for_idx_cb - callback used by the scan for a free LEB for the index. -+ * @c: the UBIFS file-system description object -+ * @lprops: LEB properties to scan -+ * @in_tree: whether the LEB properties are in main memory -+ * @data: information passed to and from the caller of the scan -+ * -+ * This function returns a code that indicates whether the scan should continue -+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree -+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop -+ * (%LPT_SCAN_STOP). -+ */ -+static int scan_for_idx_cb(struct ubifs_info *c, -+ const struct ubifs_lprops *lprops, int in_tree, -+ struct scan_data *data) -+{ -+ int ret = LPT_SCAN_CONTINUE; -+ -+ /* Exclude LEBs that are currently in use */ -+ if (lprops->flags & LPROPS_TAKEN) -+ return LPT_SCAN_CONTINUE; -+ /* Determine whether to add these LEB properties to the tree */ -+ if (!in_tree && valuable(c, lprops)) -+ ret |= LPT_SCAN_ADD; -+ /* Exclude index LEBS */ -+ if (lprops->flags & LPROPS_INDEX) -+ return ret; -+ /* Exclude LEBs that cannot be made empty */ -+ if (lprops->free + lprops->dirty != c->leb_size) -+ return ret; -+ /* -+ * We are allocating for the index so it is safe to allocate LEBs with -+ * only free and dirty space, because write buffers are sync'd at commit -+ * start. -+ */ -+ data->lnum = lprops->lnum; -+ return LPT_SCAN_ADD | LPT_SCAN_STOP; -+} -+ -+/** -+ * scan_for_leb_for_idx - scan for a free LEB for the index. -+ * @c: the UBIFS file-system description object -+ */ -+static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) -+{ -+ struct ubifs_lprops *lprops; -+ struct scan_data data; -+ int err; -+ -+ data.lnum = -1; -+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, -+ (ubifs_lpt_scan_callback)scan_for_idx_cb, -+ &data); -+ if (err) -+ return ERR_PTR(err); -+ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); -+ c->lscan_lnum = data.lnum; -+ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); -+ if (IS_ERR(lprops)) -+ return lprops; -+ ubifs_assert(lprops->lnum == data.lnum); -+ ubifs_assert(lprops->free + lprops->dirty == c->leb_size); -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); -+ return lprops; -+} -+ -+/** -+ * ubifs_find_free_leb_for_idx - find a free LEB for the index. -+ * @c: the UBIFS file-system description object -+ * -+ * This function looks for a free LEB and returns that LEB number. The returned -+ * LEB is marked as "taken", "index". -+ * -+ * Only empty LEBs are allocated. This is for two reasons. First, the commit -+ * calculates the number of LEBs to allocate based on the assumption that they -+ * will be empty. Secondly, free space at the end of an index LEB is not -+ * guaranteed to be empty because it may have been used by the in-the-gaps -+ * method prior to an unclean unmount. -+ * -+ * If no LEB is found %-ENOSPC is returned. For other failures another negative -+ * error code is returned. -+ */ -+int ubifs_find_free_leb_for_idx(struct ubifs_info *c) -+{ -+ const struct ubifs_lprops *lprops; -+ int lnum = -1, err, flags; -+ -+ ubifs_get_lprops(c); -+ -+ lprops = ubifs_fast_find_empty(c); -+ if (!lprops) { -+ lprops = ubifs_fast_find_freeable(c); -+ if (!lprops) { -+ ubifs_assert(c->freeable_cnt == 0); -+ if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { -+ lprops = scan_for_leb_for_idx(c); -+ if (IS_ERR(lprops)) { -+ err = PTR_ERR(lprops); -+ goto out; -+ } -+ } -+ } -+ } -+ -+ if (!lprops) { -+ err = -ENOSPC; -+ goto out; -+ } -+ -+ lnum = lprops->lnum; -+ -+ dbg_find("found LEB %d, free %d, dirty %d, flags %#x", -+ lnum, lprops->free, lprops->dirty, lprops->flags); -+ -+ flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX; -+ lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0); -+ if (IS_ERR(lprops)) { -+ err = PTR_ERR(lprops); -+ goto out; -+ } -+ -+ ubifs_release_lprops(c); -+ -+ /* -+ * Ensure that empty LEBs have been unmapped. They may not have been, -+ * for example, because of an unclean unmount. Also LEBs that were -+ * freeable LEBs (free + dirty == leb_size) will not have been unmapped. -+ */ -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) { -+ ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, -+ LPROPS_TAKEN | LPROPS_INDEX, 0); -+ return err; -+ } -+ -+ return lnum; -+ -+out: -+ ubifs_release_lprops(c); -+ return err; -+} -+ -+static int cmp_dirty_idx(const struct ubifs_lprops **a, -+ const struct ubifs_lprops **b) -+{ -+ const struct ubifs_lprops *lpa = *a; -+ const struct ubifs_lprops *lpb = *b; -+ -+ return lpa->dirty + lpa->free - lpb->dirty - lpb->free; -+} -+ -+static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b, -+ int size) -+{ -+ struct ubifs_lprops *t = *a; -+ -+ *a = *b; -+ *b = t; -+} -+ -+/** -+ * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos. -+ * @c: the UBIFS file-system description object -+ * -+ * This function is called each commit to create an array of LEB numbers of -+ * dirty index LEBs sorted in order of dirty and free space. This is used by -+ * the in-the-gaps method of TNC commit. -+ */ -+int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) -+{ -+ int i; -+ -+ ubifs_get_lprops(c); -+ /* Copy the LPROPS_DIRTY_IDX heap */ -+ c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt; -+ memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr, -+ sizeof(void *) * c->dirty_idx.cnt); -+ /* Sort it so that the dirtiest is now at the end */ -+ sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), -+ (int (*)(const void *, const void *))cmp_dirty_idx, -+ (void (*)(void *, void *, int))swap_dirty_idx); -+ dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); -+ if (c->dirty_idx.cnt) -+ dbg_find("dirtiest index LEB is %d with dirty %d and free %d", -+ c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum, -+ c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty, -+ c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free); -+ /* Replace the lprops pointers with LEB numbers */ -+ for (i = 0; i < c->dirty_idx.cnt; i++) -+ c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum; -+ ubifs_release_lprops(c); -+ return 0; -+} -+ -+/** -+ * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB. -+ * @c: the UBIFS file-system description object -+ * @lprops: LEB properties to scan -+ * @in_tree: whether the LEB properties are in main memory -+ * @data: information passed to and from the caller of the scan -+ * -+ * This function returns a code that indicates whether the scan should continue -+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree -+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop -+ * (%LPT_SCAN_STOP). -+ */ -+static int scan_dirty_idx_cb(struct ubifs_info *c, -+ const struct ubifs_lprops *lprops, int in_tree, -+ struct scan_data *data) -+{ -+ int ret = LPT_SCAN_CONTINUE; -+ -+ /* Exclude LEBs that are currently in use */ -+ if (lprops->flags & LPROPS_TAKEN) -+ return LPT_SCAN_CONTINUE; -+ /* Determine whether to add these LEB properties to the tree */ -+ if (!in_tree && valuable(c, lprops)) -+ ret |= LPT_SCAN_ADD; -+ /* Exclude non-index LEBs */ -+ if (!(lprops->flags & LPROPS_INDEX)) -+ return ret; -+ /* Exclude LEBs with too little space */ -+ if (lprops->free + lprops->dirty < c->min_idx_node_sz) -+ return ret; -+ /* Finally we found space */ -+ data->lnum = lprops->lnum; -+ return LPT_SCAN_ADD | LPT_SCAN_STOP; -+} -+ -+/** -+ * find_dirty_idx_leb - find a dirty index LEB. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns LEB number upon success and a negative error code upon -+ * failure. In particular, -ENOSPC is returned if a dirty index LEB is not -+ * found. -+ * -+ * Note that this function scans the entire LPT but it is called very rarely. -+ */ -+static int find_dirty_idx_leb(struct ubifs_info *c) -+{ -+ const struct ubifs_lprops *lprops; -+ struct ubifs_lpt_heap *heap; -+ struct scan_data data; -+ int err, i, ret; -+ -+ /* Check all structures in memory first */ -+ data.lnum = -1; -+ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; -+ for (i = 0; i < heap->cnt; i++) { -+ lprops = heap->arr[i]; -+ ret = scan_dirty_idx_cb(c, lprops, 1, &data); -+ if (ret & LPT_SCAN_STOP) -+ goto found; -+ } -+ list_for_each_entry(lprops, &c->frdi_idx_list, list) { -+ ret = scan_dirty_idx_cb(c, lprops, 1, &data); -+ if (ret & LPT_SCAN_STOP) -+ goto found; -+ } -+ list_for_each_entry(lprops, &c->uncat_list, list) { -+ ret = scan_dirty_idx_cb(c, lprops, 1, &data); -+ if (ret & LPT_SCAN_STOP) -+ goto found; -+ } -+ if (c->pnodes_have >= c->pnode_cnt) -+ /* All pnodes are in memory, so skip scan */ -+ return -ENOSPC; -+ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, -+ (ubifs_lpt_scan_callback)scan_dirty_idx_cb, -+ &data); -+ if (err) -+ return err; -+found: -+ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); -+ c->lscan_lnum = data.lnum; -+ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); -+ if (IS_ERR(lprops)) -+ return PTR_ERR(lprops); -+ ubifs_assert(lprops->lnum == data.lnum); -+ ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz); -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert((lprops->flags & LPROPS_INDEX)); -+ -+ dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x", -+ lprops->lnum, lprops->free, lprops->dirty, lprops->flags); -+ -+ lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, -+ lprops->flags | LPROPS_TAKEN, 0); -+ if (IS_ERR(lprops)) -+ return PTR_ERR(lprops); -+ -+ return lprops->lnum; -+} -+ -+/** -+ * get_idx_gc_leb - try to get a LEB number from trivial GC. -+ * @c: the UBIFS file-system description object -+ */ -+static int get_idx_gc_leb(struct ubifs_info *c) -+{ -+ const struct ubifs_lprops *lp; -+ int err, lnum; -+ -+ err = ubifs_get_idx_gc_leb(c); -+ if (err < 0) -+ return err; -+ lnum = err; -+ /* -+ * The LEB was due to be unmapped after the commit but -+ * it is needed now for this commit. -+ */ -+ lp = ubifs_lpt_lookup_dirty(c, lnum); -+ if (IS_ERR(lp)) -+ return PTR_ERR(lp); -+ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, -+ lp->flags | LPROPS_INDEX, -1); -+ if (IS_ERR(lp)) -+ return PTR_ERR(lp); -+ dbg_find("LEB %d, dirty %d and free %d flags %#x", -+ lp->lnum, lp->dirty, lp->free, lp->flags); -+ return lnum; -+} -+ -+/** -+ * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array. -+ * @c: the UBIFS file-system description object -+ */ -+static int find_dirtiest_idx_leb(struct ubifs_info *c) -+{ -+ const struct ubifs_lprops *lp; -+ int lnum; -+ -+ while (1) { -+ if (!c->dirty_idx.cnt) -+ return -ENOSPC; -+ /* The lprops pointers were replaced by LEB numbers */ -+ lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt]; -+ lp = ubifs_lpt_lookup(c, lnum); -+ if (IS_ERR(lp)) -+ return PTR_ERR(lp); -+ if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX)) -+ continue; -+ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, -+ lp->flags | LPROPS_TAKEN, 0); -+ if (IS_ERR(lp)) -+ return PTR_ERR(lp); -+ break; -+ } -+ dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty, -+ lp->free, lp->flags); -+ ubifs_assert(lp->flags | LPROPS_TAKEN); -+ ubifs_assert(lp->flags | LPROPS_INDEX); -+ return lnum; -+} -+ -+/** -+ * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit. -+ * @c: the UBIFS file-system description object -+ * -+ * This function attempts to find an untaken index LEB with the most free and -+ * dirty space that can be used without overwriting index nodes that were in the -+ * last index committed. -+ */ -+int ubifs_find_dirty_idx_leb(struct ubifs_info *c) -+{ -+ int err; -+ -+ ubifs_get_lprops(c); -+ -+ /* -+ * We made an array of the dirtiest index LEB numbers as at the start of -+ * last commit. Try that array first. -+ */ -+ err = find_dirtiest_idx_leb(c); -+ -+ /* Next try scanning the entire LPT */ -+ if (err == -ENOSPC) -+ err = find_dirty_idx_leb(c); -+ -+ /* Finally take any index LEBs awaiting trivial GC */ -+ if (err == -ENOSPC) -+ err = get_idx_gc_leb(c); -+ -+ ubifs_release_lprops(c); -+ return err; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/gc.c linux-2.6.24/fs/ubifs/gc.c ---- linux-2.6.24.orig/fs/ubifs/gc.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/gc.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,869 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements garbage collection. The procedure for garbage collection -+ * is different depending on whether a LEB as an index LEB (contains index -+ * nodes) or not. For non-index LEBs, garbage collection finds a LEB which -+ * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete -+ * nodes to the journal, at which point the garbage-collected LEB is free to be -+ * reused. For index LEBs, garbage collection marks the non-obsolete index nodes -+ * dirty in the TNC, and after the next commit, the garbage-collected LEB is -+ * to be reused. Garbage collection will cause the number of dirty index nodes -+ * to grow, however sufficient space is reserved for the index to ensure the -+ * commit will never run out of space. -+ * -+ * Notes about dead watermark. At current UBIFS implementation we assume that -+ * LEBs which have less than @c->dead_wm bytes of free + dirty space are full -+ * and not worth garbage-collecting. The dead watermark is one min. I/O unit -+ * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS -+ * Garbage Collector has to synchronize the GC head's write buffer before -+ * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can -+ * actually reclaim even very small pieces of dirty space by garbage collecting -+ * enough dirty LEBs, but we do not bother doing this at this implementation. -+ * -+ * Notes about dark watermark. The results of GC work depends on how big are -+ * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed, -+ * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would -+ * have to waste large pieces of free space at the end of LEB B, because nodes -+ * from LEB A would not fit. And the worst situation is when all nodes are of -+ * maximum size. So dark watermark is the amount of free + dirty space in LEB -+ * which are guaranteed to be reclaimable. If LEB has less space, the GC migh -+ * be unable to reclaim it. So, LEBs with free + dirty greater than dark -+ * watermark are "good" LEBs from GC's point of few. The other LEBs are not so -+ * good, and GC takes extra care when moving them. -+ */ -+ -+#include <linux/pagemap.h> -+#include "ubifs.h" -+ -+/* -+ * GC tries to optimize the way it fit nodes to available space, and it sorts -+ * nodes a little. The below constants are watermarks which define "large", -+ * "medium", and "small" nodes. -+ */ -+#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) -+#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ -+ -+/* -+ * GC may need to move more then one LEB to make progress. The below constants -+ * define "soft" and "hard" limits on the number of LEBs the garbage collector -+ * may move. -+ */ -+#define SOFT_LEBS_LIMIT 4 -+#define HARD_LEBS_LIMIT 32 -+ -+/** -+ * switch_gc_head - switch the garbage collection journal head. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to write -+ * @len: length of the buffer to write -+ * @lnum: LEB number written is returned here -+ * @offs: offset written is returned here -+ * -+ * This function switch the GC head to the next LEB which is reserved in -+ * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required, -+ * and other negative error code in case of failures. -+ */ -+static int switch_gc_head(struct ubifs_info *c) -+{ -+ int err, gc_lnum = c->gc_lnum; -+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; -+ -+ ubifs_assert(gc_lnum != -1); -+ dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)", -+ wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum, -+ c->leb_size - wbuf->offs - wbuf->used); -+ -+ err = ubifs_wbuf_sync_nolock(wbuf); -+ if (err) -+ return err; -+ -+ /* -+ * The GC write-buffer was synchronized, we may safely unmap -+ * 'c->gc_lnum'. -+ */ -+ err = ubifs_leb_unmap(c, gc_lnum); -+ if (err) -+ return err; -+ -+ err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); -+ if (err) -+ return err; -+ -+ c->gc_lnum = -1; -+ err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM); -+ return err; -+} -+ -+/** -+ * joinup - bring data nodes for an inode together. -+ * @c: UBIFS file-system description object -+ * @sleb: describes scanned LEB -+ * @inum: inode number -+ * @blk: block number -+ * @data: list to which to add data nodes -+ * -+ * This function looks at the first few nodes in the scanned LEB @sleb and adds -+ * them to @data if they are data nodes from @inum and have a larger block -+ * number than @blk. This function returns %0 on success and a negative error -+ * code on failure. -+ */ -+static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, -+ unsigned int blk, struct list_head *data) -+{ -+ int err, cnt = 6, lnum = sleb->lnum, offs; -+ struct ubifs_scan_node *snod, *tmp; -+ union ubifs_key *key; -+ -+ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { -+ key = &snod->key; -+ if (key_inum(c, key) == inum && -+ key_type(c, key) == UBIFS_DATA_KEY && -+ key_block(c, key) > blk) { -+ offs = snod->offs; -+ err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); -+ if (err < 0) -+ return err; -+ list_del(&snod->list); -+ if (err) { -+ list_add_tail(&snod->list, data); -+ blk = key_block(c, key); -+ } else -+ kfree(snod); -+ cnt = 6; -+ } else if (--cnt == 0) -+ break; -+ } -+ return 0; -+} -+ -+/** -+ * move_nodes - move nodes. -+ * @c: UBIFS file-system description object -+ * @sleb: describes nodes to move -+ * -+ * This function moves valid nodes from data LEB described by @sleb to the GC -+ * journal head. The obsolete nodes are dropped. -+ * -+ * When moving nodes we have to deal with classical bin-packing problem: the -+ * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", -+ * where the nodes in the @sleb->nodes list are the elements which should be -+ * fit optimally to the bins. This function uses the "first fit decreasing" -+ * strategy, although it does not really sort the nodes but just split them on -+ * 3 classes - large, medium, and small, so they are roughly sorted. -+ * -+ * This function returns zero in case of success, %-EAGAIN if commit is -+ * required, and other negative error codes in case of other failures. -+ */ -+static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) -+{ -+ struct ubifs_scan_node *snod, *tmp; -+ struct list_head data, large, medium, small; -+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; -+ int avail, err, min = INT_MAX; -+ unsigned int blk = 0; -+ ino_t inum = 0; -+ -+ INIT_LIST_HEAD(&data); -+ INIT_LIST_HEAD(&large); -+ INIT_LIST_HEAD(&medium); -+ INIT_LIST_HEAD(&small); -+ -+ while (!list_empty(&sleb->nodes)) { -+ struct list_head *lst = sleb->nodes.next; -+ -+ snod = list_entry(lst, struct ubifs_scan_node, list); -+ -+ ubifs_assert(snod->type != UBIFS_IDX_NODE); -+ ubifs_assert(snod->type != UBIFS_REF_NODE); -+ ubifs_assert(snod->type != UBIFS_CS_NODE); -+ -+ err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, -+ snod->offs, 0); -+ if (err < 0) -+ goto out; -+ -+ list_del(lst); -+ if (!err) { -+ /* The node is obsolete, remove it from the list */ -+ kfree(snod); -+ continue; -+ } -+ -+ /* -+ * Sort the list of nodes so that data nodes go first, large -+ * nodes go second, and small nodes go last. -+ */ -+ if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { -+ if (inum != key_inum(c, &snod->key)) { -+ if (inum) { -+ /* -+ * Try to move data nodes from the same -+ * inode together. -+ */ -+ err = joinup(c, sleb, inum, blk, &data); -+ if (err) -+ goto out; -+ } -+ inum = key_inum(c, &snod->key); -+ blk = key_block(c, &snod->key); -+ } -+ list_add_tail(lst, &data); -+ } else if (snod->len > MEDIUM_NODE_WM) -+ list_add_tail(lst, &large); -+ else if (snod->len > SMALL_NODE_WM) -+ list_add_tail(lst, &medium); -+ else -+ list_add_tail(lst, &small); -+ -+ /* And find the smallest node */ -+ if (snod->len < min) -+ min = snod->len; -+ } -+ -+ /* -+ * Join the tree lists so that we'd have one roughly sorted list -+ * ('large' will be the head of the joined list). -+ */ -+ list_splice(&data, &large); -+ list_splice(&medium, large.prev); -+ list_splice(&small, large.prev); -+ -+ if (wbuf->lnum == -1) { -+ /* -+ * The GC journal head is not set, because it is the first GC -+ * invocation since mount. -+ */ -+ err = switch_gc_head(c); -+ if (err) -+ goto out; -+ } -+ -+ /* Write nodes to their new location. Use the first-fit strategy */ -+ while (1) { -+ avail = c->leb_size - wbuf->offs - wbuf->used; -+ list_for_each_entry_safe(snod, tmp, &large, list) { -+ int new_lnum, new_offs; -+ -+ if (avail < min) -+ break; -+ -+ if (snod->len > avail) -+ /* This node does not fit */ -+ continue; -+ -+ cond_resched(); -+ -+ new_lnum = wbuf->lnum; -+ new_offs = wbuf->offs + wbuf->used; -+ err = ubifs_wbuf_write_nolock(wbuf, snod->node, -+ snod->len); -+ if (err) -+ goto out; -+ err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, -+ snod->offs, new_lnum, new_offs, -+ snod->len); -+ if (err) -+ goto out; -+ -+ avail = c->leb_size - wbuf->offs - wbuf->used; -+ list_del(&snod->list); -+ kfree(snod); -+ } -+ -+ if (list_empty(&large)) -+ break; -+ -+ /* -+ * Waste the rest of the space in the LEB and switch to the -+ * next LEB. -+ */ -+ err = switch_gc_head(c); -+ if (err) -+ goto out; -+ } -+ -+ return 0; -+ -+out: -+ list_for_each_entry_safe(snod, tmp, &large, list) { -+ list_del(&snod->list); -+ kfree(snod); -+ } -+ return err; -+} -+ -+/** -+ * gc_sync_wbufs - sync write-buffers for GC. -+ * @c: UBIFS file-system description object -+ * -+ * We must guarantee that obsoleting nodes are on flash. Unfortunately they may -+ * be in a write-buffer instead. That is, a node could be written to a -+ * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is -+ * erased before the write-buffer is sync'd and then there is an unclean -+ * unmount, then an existing node is lost. To avoid this, we sync all -+ * write-buffers. -+ * -+ * This function returns %0 on success or a negative error code on failure. -+ */ -+static int gc_sync_wbufs(struct ubifs_info *c) -+{ -+ int err, i; -+ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ if (i == GCHD) -+ continue; -+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); -+ if (err) -+ return err; -+ } -+ return 0; -+} -+ -+/** -+ * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock. -+ * @c: UBIFS file-system description object -+ * @lp: describes the LEB to garbage collect -+ * -+ * This function garbage-collects an LEB and returns one of the @LEB_FREED, -+ * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is -+ * required, and other negative error codes in case of failures. -+ */ -+int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) -+{ -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; -+ int err = 0, lnum = lp->lnum; -+ -+ ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 || -+ c->need_recovery); -+ ubifs_assert(c->gc_lnum != lnum); -+ ubifs_assert(wbuf->lnum != lnum); -+ -+ /* -+ * We scan the entire LEB even though we only really need to scan up to -+ * (c->leb_size - lp->free). -+ */ -+ sleb = ubifs_scan(c, lnum, 0, c->sbuf); -+ if (IS_ERR(sleb)) -+ return PTR_ERR(sleb); -+ -+ ubifs_assert(!list_empty(&sleb->nodes)); -+ snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); -+ -+ if (snod->type == UBIFS_IDX_NODE) { -+ struct ubifs_gced_idx_leb *idx_gc; -+ -+ dbg_gc("indexing LEB %d (free %d, dirty %d)", -+ lnum, lp->free, lp->dirty); -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ struct ubifs_idx_node *idx = snod->node; -+ int level = le16_to_cpu(idx->level); -+ -+ ubifs_assert(snod->type == UBIFS_IDX_NODE); -+ key_read(c, ubifs_idx_key(c, idx), &snod->key); -+ err = ubifs_dirty_idx_node(c, &snod->key, level, lnum, -+ snod->offs); -+ if (err) -+ goto out; -+ } -+ -+ idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); -+ if (!idx_gc) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ idx_gc->lnum = lnum; -+ idx_gc->unmap = 0; -+ list_add(&idx_gc->list, &c->idx_gc); -+ -+ /* -+ * Don't release the LEB until after the next commit, because -+ * it may contain data which is needed for recovery. So -+ * although we freed this LEB, it will become usable only after -+ * the commit. -+ */ -+ err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, -+ LPROPS_INDEX, 1); -+ if (err) -+ goto out; -+ err = LEB_FREED_IDX; -+ } else { -+ dbg_gc("data LEB %d (free %d, dirty %d)", -+ lnum, lp->free, lp->dirty); -+ -+ err = move_nodes(c, sleb); -+ if (err) -+ goto out_inc_seq; -+ -+ err = gc_sync_wbufs(c); -+ if (err) -+ goto out_inc_seq; -+ -+ err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); -+ if (err) -+ goto out_inc_seq; -+ -+ /* Allow for races with TNC */ -+ c->gced_lnum = lnum; -+ smp_wmb(); -+ c->gc_seq += 1; -+ smp_wmb(); -+ -+ if (c->gc_lnum == -1) { -+ c->gc_lnum = lnum; -+ err = LEB_RETAINED; -+ } else { -+ err = ubifs_wbuf_sync_nolock(wbuf); -+ if (err) -+ goto out; -+ -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ goto out; -+ -+ err = LEB_FREED; -+ } -+ } -+ -+out: -+ ubifs_scan_destroy(sleb); -+ return err; -+ -+out_inc_seq: -+ /* We may have moved at least some nodes so allow for races with TNC */ -+ c->gced_lnum = lnum; -+ smp_wmb(); -+ c->gc_seq += 1; -+ smp_wmb(); -+ goto out; -+} -+ -+/** -+ * ubifs_garbage_collect - UBIFS garbage collector. -+ * @c: UBIFS file-system description object -+ * @anyway: do GC even if there are free LEBs -+ * -+ * This function does out-of-place garbage collection. The return codes are: -+ * o positive LEB number if the LEB has been freed and may be used; -+ * o %-EAGAIN if the caller has to run commit; -+ * o %-ENOSPC if GC failed to make any progress; -+ * o other negative error codes in case of other errors. -+ * -+ * Garbage collector writes data to the journal when GC'ing data LEBs, and just -+ * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point -+ * commit may be required. But commit cannot be run from inside GC, because the -+ * caller might be holding the commit lock, so %-EAGAIN is returned instead; -+ * And this error code means that the caller has to run commit, and re-run GC -+ * if there is still no free space. -+ * -+ * There are many reasons why this function may return %-EAGAIN: -+ * o the log is full and there is no space to write an LEB reference for -+ * @c->gc_lnum; -+ * o the journal is too large and exceeds size limitations; -+ * o GC moved indexing LEBs, but they can be used only after the commit; -+ * o the shrinker fails to find clean znodes to free and requests the commit; -+ * o etc. -+ * -+ * Note, if the file-system is close to be full, this function may return -+ * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of -+ * the function. E.g., this happens if the limits on the journal size are too -+ * tough and GC writes too much to the journal before an LEB is freed. This -+ * might also mean that the journal is too large, and the TNC becomes to big, -+ * so that the shrinker is constantly called, finds not clean znodes to free, -+ * and requests commit. Well, this may also happen if the journal is all right, -+ * but another kernel process consumes too much memory. Anyway, infinite -+ * %-EAGAIN may happen, but in some extreme/misconfiguration cases. -+ */ -+int ubifs_garbage_collect(struct ubifs_info *c, int anyway) -+{ -+ int i, err, ret, min_space = c->dead_wm; -+ struct ubifs_lprops lp; -+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; -+ -+ ubifs_assert_cmt_locked(c); -+ -+ if (ubifs_gc_should_commit(c)) -+ return -EAGAIN; -+ -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ -+ if (c->ro_media) { -+ ret = -EROFS; -+ goto out_unlock; -+ } -+ -+ /* We expect the write-buffer to be empty on entry */ -+ ubifs_assert(!wbuf->used); -+ -+ for (i = 0; ; i++) { -+ int space_before = c->leb_size - wbuf->offs - wbuf->used; -+ int space_after; -+ -+ cond_resched(); -+ -+ /* Give the commit an opportunity to run */ -+ if (ubifs_gc_should_commit(c)) { -+ ret = -EAGAIN; -+ break; -+ } -+ -+ if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) { -+ /* -+ * We've done enough iterations. Indexing LEBs were -+ * moved and will be available after the commit. -+ */ -+ dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN"); -+ ubifs_commit_required(c); -+ ret = -EAGAIN; -+ break; -+ } -+ -+ if (i > HARD_LEBS_LIMIT) { -+ /* -+ * We've moved too many LEBs and have not made -+ * progress, give up. -+ */ -+ dbg_gc("hard limit, -ENOSPC"); -+ ret = -ENOSPC; -+ break; -+ } -+ -+ /* -+ * Empty and freeable LEBs can turn up while we waited for -+ * the wbuf lock, or while we have been running GC. In that -+ * case, we should just return one of those instead of -+ * continuing to GC dirty LEBs. Hence we request -+ * 'ubifs_find_dirty_leb()' to return an empty LEB if it can. -+ */ -+ ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1); -+ if (ret) { -+ if (ret == -ENOSPC) -+ dbg_gc("no more dirty LEBs"); -+ break; -+ } -+ -+ dbg_gc("found LEB %d: free %d, dirty %d, sum %d " -+ "(min. space %d)", lp.lnum, lp.free, lp.dirty, -+ lp.free + lp.dirty, min_space); -+ -+ if (lp.free + lp.dirty == c->leb_size) { -+ /* An empty LEB was returned */ -+ dbg_gc("LEB %d is free, return it", lp.lnum); -+ /* -+ * ubifs_find_dirty_leb() doesn't return freeable index -+ * LEBs. -+ */ -+ ubifs_assert(!(lp.flags & LPROPS_INDEX)); -+ if (lp.free != c->leb_size) { -+ /* -+ * Write buffers must be sync'd before -+ * unmapping freeable LEBs, because one of them -+ * may contain data which obsoletes something -+ * in 'lp.pnum'. -+ */ -+ ret = gc_sync_wbufs(c); -+ if (ret) -+ goto out; -+ ret = ubifs_change_one_lp(c, lp.lnum, -+ c->leb_size, 0, 0, 0, -+ 0); -+ if (ret) -+ goto out; -+ } -+ ret = ubifs_leb_unmap(c, lp.lnum); -+ if (ret) -+ goto out; -+ ret = lp.lnum; -+ break; -+ } -+ -+ space_before = c->leb_size - wbuf->offs - wbuf->used; -+ if (wbuf->lnum == -1) -+ space_before = 0; -+ -+ ret = ubifs_garbage_collect_leb(c, &lp); -+ if (ret < 0) { -+ if (ret == -EAGAIN || ret == -ENOSPC) { -+ /* -+ * These codes are not errors, so we have to -+ * return the LEB to lprops. But if the -+ * 'ubifs_return_leb()' function fails, its -+ * failure code is propagated to the caller -+ * instead of the original '-EAGAIN' or -+ * '-ENOSPC'. -+ */ -+ err = ubifs_return_leb(c, lp.lnum); -+ if (err) -+ ret = err; -+ break; -+ } -+ goto out; -+ } -+ -+ if (ret == LEB_FREED) { -+ /* An LEB has been freed and is ready for use */ -+ dbg_gc("LEB %d freed, return", lp.lnum); -+ ret = lp.lnum; -+ break; -+ } -+ -+ if (ret == LEB_FREED_IDX) { -+ /* -+ * This was an indexing LEB and it cannot be -+ * immediately used. And instead of requesting the -+ * commit straight away, we try to garbage collect some -+ * more. -+ */ -+ dbg_gc("indexing LEB %d freed, continue", lp.lnum); -+ continue; -+ } -+ -+ ubifs_assert(ret == LEB_RETAINED); -+ space_after = c->leb_size - wbuf->offs - wbuf->used; -+ dbg_gc("LEB %d retained, freed %d bytes", lp.lnum, -+ space_after - space_before); -+ -+ if (space_after > space_before) { -+ /* GC makes progress, keep working */ -+ min_space >>= 1; -+ if (min_space < c->dead_wm) -+ min_space = c->dead_wm; -+ continue; -+ } -+ -+ dbg_gc("did not make progress"); -+ -+ /* -+ * GC moved an LEB bud have not done any progress. This means -+ * that the previous GC head LEB contained too few free space -+ * and the LEB which was GC'ed contained only large nodes which -+ * did not fit that space. -+ * -+ * We can do 2 things: -+ * 1. pick another LEB in a hope it'll contain a small node -+ * which will fit the space we have at the end of current GC -+ * head LEB, but there is no guarantee, so we try this out -+ * unless we have already been working for too long; -+ * 2. request an LEB with more dirty space, which will force -+ * 'ubifs_find_dirty_leb()' to start scanning the lprops -+ * table, instead of just picking one from the heap -+ * (previously it already picked the dirtiest LEB). -+ */ -+ if (i < SOFT_LEBS_LIMIT) { -+ dbg_gc("try again"); -+ continue; -+ } -+ -+ min_space <<= 1; -+ if (min_space > c->dark_wm) -+ min_space = c->dark_wm; -+ dbg_gc("set min. space to %d", min_space); -+ } -+ -+ if (ret == -ENOSPC && !list_empty(&c->idx_gc)) { -+ dbg_gc("no space, some index LEBs GC'ed, -EAGAIN"); -+ ubifs_commit_required(c); -+ ret = -EAGAIN; -+ } -+ -+ err = ubifs_wbuf_sync_nolock(wbuf); -+ if (!err) -+ err = ubifs_leb_unmap(c, c->gc_lnum); -+ if (err) { -+ ret = err; -+ goto out; -+ } -+out_unlock: -+ mutex_unlock(&wbuf->io_mutex); -+ return ret; -+ -+out: -+ ubifs_assert(ret < 0); -+ ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); -+ ubifs_ro_mode(c, ret); -+ ubifs_wbuf_sync_nolock(wbuf); -+ mutex_unlock(&wbuf->io_mutex); -+ ubifs_return_leb(c, lp.lnum); -+ return ret; -+} -+ -+/** -+ * ubifs_gc_start_commit - garbage collection at start of commit. -+ * @c: UBIFS file-system description object -+ * -+ * If a LEB has only dirty and free space, then we may safely unmap it and make -+ * it free. Note, we cannot do this with indexing LEBs because dirty space may -+ * correspond index nodes that are required for recovery. In that case, the -+ * LEB cannot be unmapped until after the next commit. -+ * -+ * This function returns %0 upon success and a negative error code upon failure. -+ */ -+int ubifs_gc_start_commit(struct ubifs_info *c) -+{ -+ struct ubifs_gced_idx_leb *idx_gc; -+ const struct ubifs_lprops *lp; -+ int err = 0, flags; -+ -+ ubifs_get_lprops(c); -+ -+ /* -+ * Unmap (non-index) freeable LEBs. Note that recovery requires that all -+ * wbufs are sync'd before this, which is done in 'do_commit()'. -+ */ -+ while (1) { -+ lp = ubifs_fast_find_freeable(c); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ if (!lp) -+ break; -+ ubifs_assert(!(lp->flags & LPROPS_TAKEN)); -+ ubifs_assert(!(lp->flags & LPROPS_INDEX)); -+ err = ubifs_leb_unmap(c, lp->lnum); -+ if (err) -+ goto out; -+ lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ ubifs_assert(!(lp->flags & LPROPS_TAKEN)); -+ ubifs_assert(!(lp->flags & LPROPS_INDEX)); -+ } -+ -+ /* Mark GC'd index LEBs OK to unmap after this commit finishes */ -+ list_for_each_entry(idx_gc, &c->idx_gc, list) -+ idx_gc->unmap = 1; -+ -+ /* Record index freeable LEBs for unmapping after commit */ -+ while (1) { -+ lp = ubifs_fast_find_frdi_idx(c); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ if (!lp) -+ break; -+ idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); -+ if (!idx_gc) { -+ err = -ENOMEM; -+ goto out; -+ } -+ ubifs_assert(!(lp->flags & LPROPS_TAKEN)); -+ ubifs_assert(lp->flags & LPROPS_INDEX); -+ /* Don't release the LEB until after the next commit */ -+ flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; -+ lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ kfree(idx_gc); -+ goto out; -+ } -+ ubifs_assert(lp->flags & LPROPS_TAKEN); -+ ubifs_assert(!(lp->flags & LPROPS_INDEX)); -+ idx_gc->lnum = lp->lnum; -+ idx_gc->unmap = 1; -+ list_add(&idx_gc->list, &c->idx_gc); -+ } -+out: -+ ubifs_release_lprops(c); -+ return err; -+} -+ -+/** -+ * ubifs_gc_end_commit - garbage collection at end of commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function completes out-of-place garbage collection of index LEBs. -+ */ -+int ubifs_gc_end_commit(struct ubifs_info *c) -+{ -+ struct ubifs_gced_idx_leb *idx_gc, *tmp; -+ struct ubifs_wbuf *wbuf; -+ int err = 0; -+ -+ wbuf = &c->jheads[GCHD].wbuf; -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list) -+ if (idx_gc->unmap) { -+ dbg_gc("LEB %d", idx_gc->lnum); -+ err = ubifs_leb_unmap(c, idx_gc->lnum); -+ if (err) -+ goto out; -+ err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, -+ LPROPS_NC, 0, LPROPS_TAKEN, -1); -+ if (err) -+ goto out; -+ list_del(&idx_gc->list); -+ kfree(idx_gc); -+ } -+out: -+ mutex_unlock(&wbuf->io_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_destroy_idx_gc - destroy idx_gc list. -+ * @c: UBIFS file-system description object -+ * -+ * This function destroys the @c->idx_gc list. It is called when unmounting -+ * so locks are not needed. Returns zero in case of success and a negative -+ * error code in case of failure. -+ */ -+void ubifs_destroy_idx_gc(struct ubifs_info *c) -+{ -+ while (!list_empty(&c->idx_gc)) { -+ struct ubifs_gced_idx_leb *idx_gc; -+ -+ idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, -+ list); -+ c->idx_gc_cnt -= 1; -+ list_del(&idx_gc->list); -+ kfree(idx_gc); -+ } -+} -+ -+/** -+ * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list. -+ * @c: UBIFS file-system description object -+ * -+ * Called during start commit so locks are not needed. -+ */ -+int ubifs_get_idx_gc_leb(struct ubifs_info *c) -+{ -+ struct ubifs_gced_idx_leb *idx_gc; -+ int lnum; -+ -+ if (list_empty(&c->idx_gc)) -+ return -ENOSPC; -+ idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); -+ lnum = idx_gc->lnum; -+ /* c->idx_gc_cnt is updated by the caller when lprops are updated */ -+ list_del(&idx_gc->list); -+ kfree(idx_gc); -+ return lnum; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/io.c linux-2.6.24/fs/ubifs/io.c ---- linux-2.6.24.orig/fs/ubifs/io.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/io.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,940 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * Copyright (C) 2006, 2007 University of Szeged, Hungary -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ * Zoltan Sogor -+ */ -+ -+/* -+ * This file implements UBIFS I/O subsystem which provides various I/O-related -+ * helper functions (reading/writing/checking/validating nodes) and implements -+ * write-buffering support. Write buffers help to save space which otherwise -+ * would have been wasted for padding to the nearest minimal I/O unit boundary. -+ * Instead, data first goes to the write-buffer and is flushed when the -+ * buffer is full or when it is not used for some time (by timer). This is -+ * similar to the mechanism is used by JFFS2. -+ * -+ * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by -+ * mutexes defined inside these objects. Since sometimes upper-level code -+ * has to lock the write-buffer (e.g. journal space reservation code), many -+ * functions related to write-buffers have "nolock" suffix which means that the -+ * caller has to lock the write-buffer before calling this function. -+ * -+ * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not -+ * aligned, UBIFS starts the next node from the aligned address, and the padded -+ * bytes may contain any rubbish. In other words, UBIFS does not put padding -+ * bytes in those small gaps. Common headers of nodes store real node lengths, -+ * not aligned lengths. Indexing nodes also store real lengths in branches. -+ * -+ * UBIFS uses padding when it pads to the next min. I/O unit. In this case it -+ * uses padding nodes or padding bytes, if the padding node does not fit. -+ * -+ * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes -+ * every time they are read from the flash media. -+ */ -+ -+#include <linux/crc32.h> -+#include "ubifs.h" -+ -+/** -+ * ubifs_ro_mode - switch UBIFS to read read-only mode. -+ * @c: UBIFS file-system description object -+ * @err: error code which is the reason of switching to R/O mode -+ */ -+void ubifs_ro_mode(struct ubifs_info *c, int err) -+{ -+ if (!c->ro_media) { -+ c->ro_media = 1; -+ c->no_chk_data_crc = 0; -+ ubifs_warn("switched to read-only mode, error %d", err); -+ dbg_dump_stack(); -+ } -+} -+ -+/** -+ * ubifs_check_node - check node. -+ * @c: UBIFS file-system description object -+ * @buf: node to check -+ * @lnum: logical eraseblock number -+ * @offs: offset within the logical eraseblock -+ * @quiet: print no messages -+ * @must_chk_crc: indicates whether to always check the CRC -+ * -+ * This function checks node magic number and CRC checksum. This function also -+ * validates node length to prevent UBIFS from becoming crazy when an attacker -+ * feeds it a file-system image with incorrect nodes. For example, too large -+ * node length in the common header could cause UBIFS to read memory outside of -+ * allocated buffer when checking the CRC checksum. -+ * -+ * This function may skip data nodes CRC checking if @c->no_chk_data_crc is -+ * true, which is controlled by corresponding UBIFS mount option. However, if -+ * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is -+ * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is -+ * ignored and CRC is checked. -+ * -+ * This function returns zero in case of success and %-EUCLEAN in case of bad -+ * CRC or magic. -+ */ -+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, -+ int offs, int quiet, int must_chk_crc) -+{ -+ int err = -EINVAL, type, node_len; -+ uint32_t crc, node_crc, magic; -+ const struct ubifs_ch *ch = buf; -+ -+ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); -+ ubifs_assert(!(offs & 7) && offs < c->leb_size); -+ -+ magic = le32_to_cpu(ch->magic); -+ if (magic != UBIFS_NODE_MAGIC) { -+ if (!quiet) -+ ubifs_err("bad magic %#08x, expected %#08x", -+ magic, UBIFS_NODE_MAGIC); -+ err = -EUCLEAN; -+ goto out; -+ } -+ -+ type = ch->node_type; -+ if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { -+ if (!quiet) -+ ubifs_err("bad node type %d", type); -+ goto out; -+ } -+ -+ node_len = le32_to_cpu(ch->len); -+ if (node_len + offs > c->leb_size) -+ goto out_len; -+ -+ if (c->ranges[type].max_len == 0) { -+ if (node_len != c->ranges[type].len) -+ goto out_len; -+ } else if (node_len < c->ranges[type].min_len || -+ node_len > c->ranges[type].max_len) -+ goto out_len; -+ -+ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && -+ c->no_chk_data_crc) -+ return 0; -+ -+ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); -+ node_crc = le32_to_cpu(ch->crc); -+ if (crc != node_crc) { -+ if (!quiet) -+ ubifs_err("bad CRC: calculated %#08x, read %#08x", -+ crc, node_crc); -+ err = -EUCLEAN; -+ goto out; -+ } -+ -+ return 0; -+ -+out_len: -+ if (!quiet) -+ ubifs_err("bad node length %d", node_len); -+out: -+ if (!quiet) { -+ ubifs_err("bad node at LEB %d:%d", lnum, offs); -+ dbg_dump_node(c, buf); -+ dbg_dump_stack(); -+ } -+ return err; -+} -+ -+/** -+ * ubifs_pad - pad flash space. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to put padding to -+ * @pad: how many bytes to pad -+ * -+ * The flash media obliges us to write only in chunks of %c->min_io_size and -+ * when we have to write less data we add padding node to the write-buffer and -+ * pad it to the next minimal I/O unit's boundary. Padding nodes help when the -+ * media is being scanned. If the amount of wasted space is not enough to fit a -+ * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes -+ * pattern (%UBIFS_PADDING_BYTE). -+ * -+ * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is -+ * used. -+ */ -+void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) -+{ -+ uint32_t crc; -+ -+ ubifs_assert(pad >= 0 && !(pad & 7)); -+ -+ if (pad >= UBIFS_PAD_NODE_SZ) { -+ struct ubifs_ch *ch = buf; -+ struct ubifs_pad_node *pad_node = buf; -+ -+ ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); -+ ch->node_type = UBIFS_PAD_NODE; -+ ch->group_type = UBIFS_NO_NODE_GROUP; -+ ch->padding[0] = ch->padding[1] = 0; -+ ch->sqnum = 0; -+ ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); -+ pad -= UBIFS_PAD_NODE_SZ; -+ pad_node->pad_len = cpu_to_le32(pad); -+ crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); -+ ch->crc = cpu_to_le32(crc); -+ memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); -+ } else if (pad > 0) -+ /* Too little space, padding node won't fit */ -+ memset(buf, UBIFS_PADDING_BYTE, pad); -+} -+ -+/** -+ * next_sqnum - get next sequence number. -+ * @c: UBIFS file-system description object -+ */ -+static unsigned long long next_sqnum(struct ubifs_info *c) -+{ -+ unsigned long long sqnum; -+ -+ spin_lock(&c->cnt_lock); -+ sqnum = ++c->max_sqnum; -+ spin_unlock(&c->cnt_lock); -+ -+ if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { -+ if (sqnum >= SQNUM_WATERMARK) { -+ ubifs_err("sequence number overflow %llu, end of life", -+ sqnum); -+ ubifs_ro_mode(c, -EINVAL); -+ } -+ ubifs_warn("running out of sequence numbers, end of life soon"); -+ } -+ -+ return sqnum; -+} -+ -+/** -+ * ubifs_prepare_node - prepare node to be written to flash. -+ * @c: UBIFS file-system description object -+ * @node: the node to pad -+ * @len: node length -+ * @pad: if the buffer has to be padded -+ * -+ * This function prepares node at @node to be written to the media - it -+ * calculates node CRC, fills the common header, and adds proper padding up to -+ * the next minimum I/O unit if @pad is not zero. -+ */ -+void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) -+{ -+ uint32_t crc; -+ struct ubifs_ch *ch = node; -+ unsigned long long sqnum = next_sqnum(c); -+ -+ ubifs_assert(len >= UBIFS_CH_SZ); -+ -+ ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); -+ ch->len = cpu_to_le32(len); -+ ch->group_type = UBIFS_NO_NODE_GROUP; -+ ch->sqnum = cpu_to_le64(sqnum); -+ ch->padding[0] = ch->padding[1] = 0; -+ crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); -+ ch->crc = cpu_to_le32(crc); -+ -+ if (pad) { -+ len = ALIGN(len, 8); -+ pad = ALIGN(len, c->min_io_size) - len; -+ ubifs_pad(c, node + len, pad); -+ } -+} -+ -+/** -+ * ubifs_prep_grp_node - prepare node of a group to be written to flash. -+ * @c: UBIFS file-system description object -+ * @node: the node to pad -+ * @len: node length -+ * @last: indicates the last node of the group -+ * -+ * This function prepares node at @node to be written to the media - it -+ * calculates node CRC and fills the common header. -+ */ -+void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) -+{ -+ uint32_t crc; -+ struct ubifs_ch *ch = node; -+ unsigned long long sqnum = next_sqnum(c); -+ -+ ubifs_assert(len >= UBIFS_CH_SZ); -+ -+ ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); -+ ch->len = cpu_to_le32(len); -+ if (last) -+ ch->group_type = UBIFS_LAST_OF_NODE_GROUP; -+ else -+ ch->group_type = UBIFS_IN_NODE_GROUP; -+ ch->sqnum = cpu_to_le64(sqnum); -+ ch->padding[0] = ch->padding[1] = 0; -+ crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); -+ ch->crc = cpu_to_le32(crc); -+} -+ -+/** -+ * wbuf_timer_callback - write-buffer timer callback function. -+ * @data: timer data (write-buffer descriptor) -+ * -+ * This function is called when the write-buffer timer expires. -+ */ -+static void wbuf_timer_callback_nolock(unsigned long data) -+{ -+ struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; -+ -+ wbuf->need_sync = 1; -+ wbuf->c->need_wbuf_sync = 1; -+ ubifs_wake_up_bgt(wbuf->c); -+} -+ -+/** -+ * new_wbuf_timer - start new write-buffer timer. -+ * @wbuf: write-buffer descriptor -+ */ -+static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) -+{ -+ ubifs_assert(!timer_pending(&wbuf->timer)); -+ -+ if (!wbuf->timeout) -+ return; -+ -+ wbuf->timer.expires = jiffies + wbuf->timeout; -+ add_timer(&wbuf->timer); -+} -+ -+/** -+ * cancel_wbuf_timer - cancel write-buffer timer. -+ * @wbuf: write-buffer descriptor -+ */ -+static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) -+{ -+ /* -+ * If the syncer is waiting for the lock (from the background thread's -+ * context) and another task is changing write-buffer then the syncing -+ * should be canceled. -+ */ -+ wbuf->need_sync = 0; -+ del_timer(&wbuf->timer); -+} -+ -+/** -+ * ubifs_wbuf_sync_nolock - synchronize write-buffer. -+ * @wbuf: write-buffer to synchronize -+ * -+ * This function synchronizes write-buffer @buf and returns zero in case of -+ * success or a negative error code in case of failure. -+ */ -+int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) -+{ -+ struct ubifs_info *c = wbuf->c; -+ int err, dirt; -+ -+ cancel_wbuf_timer_nolock(wbuf); -+ if (!wbuf->used || wbuf->lnum == -1) -+ /* Write-buffer is empty or not seeked */ -+ return 0; -+ -+ dbg_io("LEB %d:%d, %d bytes", -+ wbuf->lnum, wbuf->offs, wbuf->used); -+ ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); -+ ubifs_assert(!(wbuf->avail & 7)); -+ ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); -+ -+ if (c->ro_media) -+ return -EROFS; -+ -+ ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); -+ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, -+ c->min_io_size, wbuf->dtype); -+ if (err) { -+ ubifs_err("cannot write %d bytes to LEB %d:%d", -+ c->min_io_size, wbuf->lnum, wbuf->offs); -+ dbg_dump_stack(); -+ return err; -+ } -+ -+ dirt = wbuf->avail; -+ -+ spin_lock(&wbuf->lock); -+ wbuf->offs += c->min_io_size; -+ wbuf->avail = c->min_io_size; -+ wbuf->used = 0; -+ wbuf->next_ino = 0; -+ spin_unlock(&wbuf->lock); -+ -+ if (wbuf->sync_callback) -+ err = wbuf->sync_callback(c, wbuf->lnum, -+ c->leb_size - wbuf->offs, dirt); -+ return err; -+} -+ -+/** -+ * ubifs_wbuf_seek_nolock - seek write-buffer. -+ * @wbuf: write-buffer -+ * @lnum: logical eraseblock number to seek to -+ * @offs: logical eraseblock offset to seek to -+ * @dtype: data type -+ * -+ * This function targets the write buffer to logical eraseblock @lnum:@offs. -+ * The write-buffer is synchronized if it is not empty. Returns zero in case of -+ * success and a negative error code in case of failure. -+ */ -+int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, -+ int dtype) -+{ -+ const struct ubifs_info *c = wbuf->c; -+ -+ dbg_io("LEB %d:%d", lnum, offs); -+ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); -+ ubifs_assert(offs >= 0 && offs <= c->leb_size); -+ ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); -+ ubifs_assert(lnum != wbuf->lnum); -+ -+ if (wbuf->used > 0) { -+ int err = ubifs_wbuf_sync_nolock(wbuf); -+ -+ if (err) -+ return err; -+ } -+ -+ spin_lock(&wbuf->lock); -+ wbuf->lnum = lnum; -+ wbuf->offs = offs; -+ wbuf->avail = c->min_io_size; -+ wbuf->used = 0; -+ spin_unlock(&wbuf->lock); -+ wbuf->dtype = dtype; -+ -+ return 0; -+} -+ -+/** -+ * ubifs_bg_wbufs_sync - synchronize write-buffers. -+ * @c: UBIFS file-system description object -+ * -+ * This function is called by background thread to synchronize write-buffers. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubifs_bg_wbufs_sync(struct ubifs_info *c) -+{ -+ int err, i; -+ -+ if (!c->need_wbuf_sync) -+ return 0; -+ c->need_wbuf_sync = 0; -+ -+ if (c->ro_media) { -+ err = -EROFS; -+ goto out_timers; -+ } -+ -+ dbg_io("synchronize"); -+ for (i = 0; i < c->jhead_cnt; i++) { -+ struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; -+ -+ cond_resched(); -+ -+ /* -+ * If the mutex is locked then wbuf is being changed, so -+ * synchronization is not necessary. -+ */ -+ if (mutex_is_locked(&wbuf->io_mutex)) -+ continue; -+ -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ if (!wbuf->need_sync) { -+ mutex_unlock(&wbuf->io_mutex); -+ continue; -+ } -+ -+ err = ubifs_wbuf_sync_nolock(wbuf); -+ mutex_unlock(&wbuf->io_mutex); -+ if (err) { -+ ubifs_err("cannot sync write-buffer, error %d", err); -+ ubifs_ro_mode(c, err); -+ goto out_timers; -+ } -+ } -+ -+ return 0; -+ -+out_timers: -+ /* Cancel all timers to prevent repeated errors */ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; -+ -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ cancel_wbuf_timer_nolock(wbuf); -+ mutex_unlock(&wbuf->io_mutex); -+ } -+ return err; -+} -+ -+/** -+ * ubifs_wbuf_write_nolock - write data to flash via write-buffer. -+ * @wbuf: write-buffer -+ * @buf: node to write -+ * @len: node length -+ * -+ * This function writes data to flash via write-buffer @wbuf. This means that -+ * the last piece of the node won't reach the flash media immediately if it -+ * does not take whole minimal I/O unit. Instead, the node will sit in RAM -+ * until the write-buffer is synchronized (e.g., by timer). -+ * -+ * This function returns zero in case of success and a negative error code in -+ * case of failure. If the node cannot be written because there is no more -+ * space in this logical eraseblock, %-ENOSPC is returned. -+ */ -+int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) -+{ -+ struct ubifs_info *c = wbuf->c; -+ int err, written, n, aligned_len = ALIGN(len, 8), offs; -+ -+ dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, -+ dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, -+ wbuf->offs + wbuf->used); -+ ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); -+ ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); -+ ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); -+ ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); -+ ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); -+ -+ if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { -+ err = -ENOSPC; -+ goto out; -+ } -+ -+ cancel_wbuf_timer_nolock(wbuf); -+ -+ if (c->ro_media) -+ return -EROFS; -+ -+ if (aligned_len <= wbuf->avail) { -+ /* -+ * The node is not very large and fits entirely within -+ * write-buffer. -+ */ -+ memcpy(wbuf->buf + wbuf->used, buf, len); -+ -+ if (aligned_len == wbuf->avail) { -+ dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, -+ wbuf->offs); -+ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, -+ wbuf->offs, c->min_io_size, -+ wbuf->dtype); -+ if (err) -+ goto out; -+ -+ spin_lock(&wbuf->lock); -+ wbuf->offs += c->min_io_size; -+ wbuf->avail = c->min_io_size; -+ wbuf->used = 0; -+ wbuf->next_ino = 0; -+ spin_unlock(&wbuf->lock); -+ } else { -+ spin_lock(&wbuf->lock); -+ wbuf->avail -= aligned_len; -+ wbuf->used += aligned_len; -+ spin_unlock(&wbuf->lock); -+ } -+ -+ goto exit; -+ } -+ -+ /* -+ * The node is large enough and does not fit entirely within current -+ * minimal I/O unit. We have to fill and flush write-buffer and switch -+ * to the next min. I/O unit. -+ */ -+ dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); -+ memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); -+ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, -+ c->min_io_size, wbuf->dtype); -+ if (err) -+ goto out; -+ -+ offs = wbuf->offs + c->min_io_size; -+ len -= wbuf->avail; -+ aligned_len -= wbuf->avail; -+ written = wbuf->avail; -+ -+ /* -+ * The remaining data may take more whole min. I/O units, so write the -+ * remains multiple to min. I/O unit size directly to the flash media. -+ * We align node length to 8-byte boundary because we anyway flash wbuf -+ * if the remaining space is less than 8 bytes. -+ */ -+ n = aligned_len >> c->min_io_shift; -+ if (n) { -+ n <<= c->min_io_shift; -+ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); -+ err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, -+ wbuf->dtype); -+ if (err) -+ goto out; -+ offs += n; -+ aligned_len -= n; -+ len -= n; -+ written += n; -+ } -+ -+ spin_lock(&wbuf->lock); -+ if (aligned_len) -+ /* -+ * And now we have what's left and what does not take whole -+ * min. I/O unit, so write it to the write-buffer and we are -+ * done. -+ */ -+ memcpy(wbuf->buf, buf + written, len); -+ -+ wbuf->offs = offs; -+ wbuf->used = aligned_len; -+ wbuf->avail = c->min_io_size - aligned_len; -+ wbuf->next_ino = 0; -+ spin_unlock(&wbuf->lock); -+ -+exit: -+ if (wbuf->sync_callback) { -+ int free = c->leb_size - wbuf->offs - wbuf->used; -+ -+ err = wbuf->sync_callback(c, wbuf->lnum, free, 0); -+ if (err) -+ goto out; -+ } -+ -+ if (wbuf->used) -+ new_wbuf_timer_nolock(wbuf); -+ -+ return 0; -+ -+out: -+ ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", -+ len, wbuf->lnum, wbuf->offs, err); -+ dbg_dump_node(c, buf); -+ dbg_dump_stack(); -+ dbg_dump_leb(c, wbuf->lnum); -+ return err; -+} -+ -+/** -+ * ubifs_write_node - write node to the media. -+ * @c: UBIFS file-system description object -+ * @buf: the node to write -+ * @len: node length -+ * @lnum: logical eraseblock number -+ * @offs: offset within the logical eraseblock -+ * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) -+ * -+ * This function automatically fills node magic number, assigns sequence -+ * number, and calculates node CRC checksum. The length of the @buf buffer has -+ * to be aligned to the minimal I/O unit size. This function automatically -+ * appends padding node and padding bytes if needed. Returns zero in case of -+ * success and a negative error code in case of failure. -+ */ -+int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, -+ int offs, int dtype) -+{ -+ int err, buf_len = ALIGN(len, c->min_io_size); -+ -+ dbg_io("LEB %d:%d, %s, length %d (aligned %d)", -+ lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, -+ buf_len); -+ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); -+ ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); -+ -+ if (c->ro_media) -+ return -EROFS; -+ -+ ubifs_prepare_node(c, buf, len, 1); -+ err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); -+ if (err) { -+ ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", -+ buf_len, lnum, offs, err); -+ dbg_dump_node(c, buf); -+ dbg_dump_stack(); -+ } -+ -+ return err; -+} -+ -+/** -+ * ubifs_read_node_wbuf - read node from the media or write-buffer. -+ * @wbuf: wbuf to check for un-written data -+ * @buf: buffer to read to -+ * @type: node type -+ * @len: node length -+ * @lnum: logical eraseblock number -+ * @offs: offset within the logical eraseblock -+ * -+ * This function reads a node of known type and length, checks it and stores -+ * in @buf. If the node partially or fully sits in the write-buffer, this -+ * function takes data from the buffer, otherwise it reads the flash media. -+ * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative -+ * error code in case of failure. -+ */ -+int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, -+ int lnum, int offs) -+{ -+ const struct ubifs_info *c = wbuf->c; -+ int err, rlen, overlap; -+ struct ubifs_ch *ch = buf; -+ -+ dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); -+ ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); -+ ubifs_assert(!(offs & 7) && offs < c->leb_size); -+ ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); -+ -+ spin_lock(&wbuf->lock); -+ overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); -+ if (!overlap) { -+ /* We may safely unlock the write-buffer and read the data */ -+ spin_unlock(&wbuf->lock); -+ return ubifs_read_node(c, buf, type, len, lnum, offs); -+ } -+ -+ /* Don't read under wbuf */ -+ rlen = wbuf->offs - offs; -+ if (rlen < 0) -+ rlen = 0; -+ -+ /* Copy the rest from the write-buffer */ -+ memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); -+ spin_unlock(&wbuf->lock); -+ -+ if (rlen > 0) { -+ /* Read everything that goes before write-buffer */ -+ err = ubi_read(c->ubi, lnum, buf, offs, rlen); -+ if (err && err != -EBADMSG) { -+ ubifs_err("failed to read node %d from LEB %d:%d, " -+ "error %d", type, lnum, offs, err); -+ dbg_dump_stack(); -+ return err; -+ } -+ } -+ -+ if (type != ch->node_type) { -+ ubifs_err("bad node type (%d but expected %d)", -+ ch->node_type, type); -+ goto out; -+ } -+ -+ err = ubifs_check_node(c, buf, lnum, offs, 0, 0); -+ if (err) { -+ ubifs_err("expected node type %d", type); -+ return err; -+ } -+ -+ rlen = le32_to_cpu(ch->len); -+ if (rlen != len) { -+ ubifs_err("bad node length %d, expected %d", rlen, len); -+ goto out; -+ } -+ -+ return 0; -+ -+out: -+ ubifs_err("bad node at LEB %d:%d", lnum, offs); -+ dbg_dump_node(c, buf); -+ dbg_dump_stack(); -+ return -EINVAL; -+} -+ -+/** -+ * ubifs_read_node - read node. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to read to -+ * @type: node type -+ * @len: node length (not aligned) -+ * @lnum: logical eraseblock number -+ * @offs: offset within the logical eraseblock -+ * -+ * This function reads a node of known type and and length, checks it and -+ * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched -+ * and a negative error code in case of failure. -+ */ -+int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, -+ int lnum, int offs) -+{ -+ int err, l; -+ struct ubifs_ch *ch = buf; -+ -+ dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); -+ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); -+ ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); -+ ubifs_assert(!(offs & 7) && offs < c->leb_size); -+ ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); -+ -+ err = ubi_read(c->ubi, lnum, buf, offs, len); -+ if (err && err != -EBADMSG) { -+ ubifs_err("cannot read node %d from LEB %d:%d, error %d", -+ type, lnum, offs, err); -+ return err; -+ } -+ -+ if (type != ch->node_type) { -+ ubifs_err("bad node type (%d but expected %d)", -+ ch->node_type, type); -+ goto out; -+ } -+ -+ err = ubifs_check_node(c, buf, lnum, offs, 0, 0); -+ if (err) { -+ ubifs_err("expected node type %d", type); -+ return err; -+ } -+ -+ l = le32_to_cpu(ch->len); -+ if (l != len) { -+ ubifs_err("bad node length %d, expected %d", l, len); -+ goto out; -+ } -+ -+ return 0; -+ -+out: -+ ubifs_err("bad node at LEB %d:%d", lnum, offs); -+ dbg_dump_node(c, buf); -+ dbg_dump_stack(); -+ return -EINVAL; -+} -+ -+/** -+ * ubifs_wbuf_init - initialize write-buffer. -+ * @c: UBIFS file-system description object -+ * @wbuf: write-buffer to initialize -+ * -+ * This function initializes write buffer. Returns zero in case of success -+ * %-ENOMEM in case of failure. -+ */ -+int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) -+{ -+ size_t size; -+ -+ wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); -+ if (!wbuf->buf) -+ return -ENOMEM; -+ -+ size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); -+ wbuf->inodes = kmalloc(size, GFP_KERNEL); -+ if (!wbuf->inodes) { -+ kfree(wbuf->buf); -+ wbuf->buf = NULL; -+ return -ENOMEM; -+ } -+ -+ wbuf->used = 0; -+ wbuf->lnum = wbuf->offs = -1; -+ wbuf->avail = c->min_io_size; -+ wbuf->dtype = UBI_UNKNOWN; -+ wbuf->sync_callback = NULL; -+ mutex_init(&wbuf->io_mutex); -+ spin_lock_init(&wbuf->lock); -+ -+ wbuf->c = c; -+ init_timer(&wbuf->timer); -+ wbuf->timer.function = wbuf_timer_callback_nolock; -+ wbuf->timer.data = (unsigned long)wbuf; -+ wbuf->timeout = DEFAULT_WBUF_TIMEOUT; -+ wbuf->next_ino = 0; -+ -+ return 0; -+} -+ -+/** -+ * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. -+ * @wbuf: the write-buffer whereto add -+ * @inum: the inode number -+ * -+ * This function adds an inode number to the inode array of the write-buffer. -+ */ -+void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) -+{ -+ if (!wbuf->buf) -+ /* NOR flash or something similar */ -+ return; -+ -+ spin_lock(&wbuf->lock); -+ if (wbuf->used) -+ wbuf->inodes[wbuf->next_ino++] = inum; -+ spin_unlock(&wbuf->lock); -+} -+ -+/** -+ * wbuf_has_ino - returns if the wbuf contains data from the inode. -+ * @wbuf: the write-buffer -+ * @inum: the inode number -+ * -+ * This function returns with %1 if the write-buffer contains some data from the -+ * given inode otherwise it returns with %0. -+ */ -+static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) -+{ -+ int i, ret = 0; -+ -+ spin_lock(&wbuf->lock); -+ for (i = 0; i < wbuf->next_ino; i++) -+ if (inum == wbuf->inodes[i]) { -+ ret = 1; -+ break; -+ } -+ spin_unlock(&wbuf->lock); -+ -+ return ret; -+} -+ -+/** -+ * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. -+ * @c: UBIFS file-system description object -+ * @inode: inode to synchronize -+ * -+ * This function synchronizes write-buffers which contain nodes belonging to -+ * @inode. Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) -+{ -+ int i, err = 0; -+ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; -+ -+ if (i == GCHD) -+ /* -+ * GC head is special, do not look at it. Even if the -+ * head contains something related to this inode, it is -+ * a _copy_ of corresponding on-flash node which sits -+ * somewhere else. -+ */ -+ continue; -+ -+ if (!wbuf_has_ino(wbuf, inode->i_ino)) -+ continue; -+ -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ if (wbuf_has_ino(wbuf, inode->i_ino)) -+ err = ubifs_wbuf_sync_nolock(wbuf); -+ mutex_unlock(&wbuf->io_mutex); -+ -+ if (err) { -+ ubifs_ro_mode(c, err); -+ return err; -+ } -+ } -+ return 0; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/ioctl.c linux-2.6.24/fs/ubifs/ioctl.c ---- linux-2.6.24.orig/fs/ubifs/ioctl.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/ioctl.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,196 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * Copyright (C) 2006, 2007 University of Szeged, Hungary -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Zoltan Sogor -+ * Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* This file implements EXT2-compatible extended attribute ioctl() calls */ -+ -+#include <linux/compat.h> -+#include <linux/smp_lock.h> -+#include "ubifs.h" -+ -+/** -+ * ubifs_set_inode_flags - set VFS inode flags. -+ * @inode: VFS inode to set flags for -+ * -+ * This function propagates flags from UBIFS inode object to VFS inode object. -+ */ -+void ubifs_set_inode_flags(struct inode *inode) -+{ -+ unsigned int flags = ubifs_inode(inode)->flags; -+ -+ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC); -+ if (flags & UBIFS_SYNC_FL) -+ inode->i_flags |= S_SYNC; -+ if (flags & UBIFS_APPEND_FL) -+ inode->i_flags |= S_APPEND; -+ if (flags & UBIFS_IMMUTABLE_FL) -+ inode->i_flags |= S_IMMUTABLE; -+ if (flags & UBIFS_DIRSYNC_FL) -+ inode->i_flags |= S_DIRSYNC; -+} -+ -+/* -+ * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags. -+ * @ioctl_flags: flags to convert -+ * -+ * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags -+ * (@UBIFS_COMPR_FL, etc). -+ */ -+static int ioctl2ubifs(int ioctl_flags) -+{ -+ int ubifs_flags = 0; -+ -+ if (ioctl_flags & FS_COMPR_FL) -+ ubifs_flags |= UBIFS_COMPR_FL; -+ if (ioctl_flags & FS_SYNC_FL) -+ ubifs_flags |= UBIFS_SYNC_FL; -+ if (ioctl_flags & FS_APPEND_FL) -+ ubifs_flags |= UBIFS_APPEND_FL; -+ if (ioctl_flags & FS_IMMUTABLE_FL) -+ ubifs_flags |= UBIFS_IMMUTABLE_FL; -+ if (ioctl_flags & FS_DIRSYNC_FL) -+ ubifs_flags |= UBIFS_DIRSYNC_FL; -+ -+ return ubifs_flags; -+} -+ -+/* -+ * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags. -+ * @ubifs_flags: flags to convert -+ * -+ * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags -+ * (@FS_COMPR_FL, etc). -+ */ -+static int ubifs2ioctl(int ubifs_flags) -+{ -+ int ioctl_flags = 0; -+ -+ if (ubifs_flags & UBIFS_COMPR_FL) -+ ioctl_flags |= FS_COMPR_FL; -+ if (ubifs_flags & UBIFS_SYNC_FL) -+ ioctl_flags |= FS_SYNC_FL; -+ if (ubifs_flags & UBIFS_APPEND_FL) -+ ioctl_flags |= FS_APPEND_FL; -+ if (ubifs_flags & UBIFS_IMMUTABLE_FL) -+ ioctl_flags |= FS_IMMUTABLE_FL; -+ if (ubifs_flags & UBIFS_DIRSYNC_FL) -+ ioctl_flags |= FS_DIRSYNC_FL; -+ -+ return ioctl_flags; -+} -+ -+static int setflags(struct inode *inode, int flags) -+{ -+ int oldflags, err, release; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ struct ubifs_budget_req req = { .dirtied_ino = 1, -+ .dirtied_ino_d = ui->data_len }; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ /* -+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by -+ * the relevant capability. -+ */ -+ mutex_lock(&ui->ui_mutex); -+ oldflags = ubifs2ioctl(ui->flags); -+ if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { -+ if (!capable(CAP_LINUX_IMMUTABLE)) { -+ err = -EPERM; -+ goto out_unlock; -+ } -+ } -+ -+ ui->flags = ioctl2ubifs(flags); -+ ubifs_set_inode_flags(inode); -+ inode->i_ctime = ubifs_current_time(inode); -+ release = ui->dirty; -+ mark_inode_dirty_sync(inode); -+ mutex_unlock(&ui->ui_mutex); -+ -+ if (release) -+ ubifs_release_budget(c, &req); -+ if (IS_SYNC(inode)) -+ err = write_inode_now(inode, 1); -+ return err; -+ -+out_unlock: -+ ubifs_err("can't modify inode %lu attributes", inode->i_ino); -+ mutex_unlock(&ui->ui_mutex); -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ int flags; -+ struct inode *inode = file->f_path.dentry->d_inode; -+ -+ switch (cmd) { -+ case FS_IOC_GETFLAGS: -+ flags = ubifs2ioctl(ubifs_inode(inode)->flags); -+ -+ dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags); -+ return put_user(flags, (int __user *) arg); -+ -+ case FS_IOC_SETFLAGS: { -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ -+ if (!is_owner_or_cap(inode)) -+ return -EACCES; -+ -+ if (get_user(flags, (int __user *) arg)) -+ return -EFAULT; -+ -+ if (!S_ISDIR(inode->i_mode)) -+ flags &= ~FS_DIRSYNC_FL; -+ -+ dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); -+ return setflags(inode, flags); -+ } -+ -+ default: -+ return -ENOTTY; -+ } -+} -+ -+#ifdef CONFIG_COMPAT -+long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ switch (cmd) { -+ case FS_IOC32_GETFLAGS: -+ cmd = FS_IOC_GETFLAGS; -+ break; -+ case FS_IOC32_SETFLAGS: -+ cmd = FS_IOC_SETFLAGS; -+ break; -+ default: -+ return -ENOIOCTLCMD; -+ } -+ return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); -+} -+#endif -diff -Nurd linux-2.6.24.orig/fs/ubifs/journal.c linux-2.6.24/fs/ubifs/journal.c ---- linux-2.6.24.orig/fs/ubifs/journal.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/journal.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1443 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file implements UBIFS journal. -+ * -+ * The journal consists of 2 parts - the log and bud LEBs. The log has fixed -+ * length and position, while a bud logical eraseblock is any LEB in the main -+ * area. Buds contain file system data - data nodes, inode nodes, etc. The log -+ * contains only references to buds and some other stuff like commit -+ * start node. The idea is that when we commit the journal, we do -+ * not copy the data, the buds just become indexed. Since after the commit the -+ * nodes in bud eraseblocks become leaf nodes of the file system index tree, we -+ * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will -+ * become leafs in the future. -+ * -+ * The journal is multi-headed because we want to write data to the journal as -+ * optimally as possible. It is nice to have nodes belonging to the same inode -+ * in one LEB, so we may write data owned by different inodes to different -+ * journal heads, although at present only one data head is used. -+ * -+ * For recovery reasons, the base head contains all inode nodes, all directory -+ * entry nodes and all truncate nodes. This means that the other heads contain -+ * only data nodes. -+ * -+ * Bud LEBs may be half-indexed. For example, if the bud was not full at the -+ * time of commit, the bud is retained to continue to be used in the journal, -+ * even though the "front" of the LEB is now indexed. In that case, the log -+ * reference contains the offset where the bud starts for the purposes of the -+ * journal. -+ * -+ * The journal size has to be limited, because the larger is the journal, the -+ * longer it takes to mount UBIFS (scanning the journal) and the more memory it -+ * takes (indexing in the TNC). -+ * -+ * All the journal write operations like 'ubifs_jnl_update()' here, which write -+ * multiple UBIFS nodes to the journal at one go, are atomic with respect to -+ * unclean reboots. Should the unclean reboot happen, the recovery code drops -+ * all the nodes. -+ */ -+ -+#include "ubifs.h" -+ -+/** -+ * zero_ino_node_unused - zero out unused fields of an on-flash inode node. -+ * @ino: the inode to zero out -+ */ -+static inline void zero_ino_node_unused(struct ubifs_ino_node *ino) -+{ -+ memset(ino->padding1, 0, 4); -+ memset(ino->padding2, 0, 26); -+} -+ -+/** -+ * zero_dent_node_unused - zero out unused fields of an on-flash directory -+ * entry node. -+ * @dent: the directory entry to zero out -+ */ -+static inline void zero_dent_node_unused(struct ubifs_dent_node *dent) -+{ -+ dent->padding1 = 0; -+ memset(dent->padding2, 0, 4); -+} -+ -+/** -+ * zero_data_node_unused - zero out unused fields of an on-flash data node. -+ * @data: the data node to zero out -+ */ -+static inline void zero_data_node_unused(struct ubifs_data_node *data) -+{ -+ memset(data->padding, 0, 2); -+} -+ -+/** -+ * zero_trun_node_unused - zero out unused fields of an on-flash truncation -+ * node. -+ * @trun: the truncation node to zero out -+ */ -+static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) -+{ -+ memset(trun->padding, 0, 12); -+} -+ -+/** -+ * reserve_space - reserve space in the journal. -+ * @c: UBIFS file-system description object -+ * @jhead: journal head number -+ * @len: node length -+ * -+ * This function reserves space in journal head @head. If the reservation -+ * succeeded, the journal head stays locked and later has to be unlocked using -+ * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock -+ * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and -+ * other negative error codes in case of other failures. -+ */ -+static int reserve_space(struct ubifs_info *c, int jhead, int len) -+{ -+ int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; -+ struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; -+ -+ /* -+ * Typically, the base head has smaller nodes written to it, so it is -+ * better to try to allocate space at the ends of eraseblocks. This is -+ * what the squeeze parameter does. -+ */ -+ squeeze = (jhead == BASEHD); -+again: -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ -+ if (c->ro_media) { -+ err = -EROFS; -+ goto out_unlock; -+ } -+ -+ avail = c->leb_size - wbuf->offs - wbuf->used; -+ if (wbuf->lnum != -1 && avail >= len) -+ return 0; -+ -+ /* -+ * Write buffer wasn't seek'ed or there is no enough space - look for an -+ * LEB with some empty space. -+ */ -+ lnum = ubifs_find_free_space(c, len, &free, squeeze); -+ if (lnum >= 0) { -+ /* Found an LEB, add it to the journal head */ -+ offs = c->leb_size - free; -+ err = ubifs_add_bud_to_log(c, jhead, lnum, offs); -+ if (err) -+ goto out_return; -+ /* A new bud was successfully allocated and added to the log */ -+ goto out; -+ } -+ -+ err = lnum; -+ if (err != -ENOSPC) -+ goto out_unlock; -+ -+ /* -+ * No free space, we have to run garbage collector to make -+ * some. But the write-buffer mutex has to be unlocked because -+ * GC also takes it. -+ */ -+ dbg_jnl("no free space jhead %d, run GC", jhead); -+ mutex_unlock(&wbuf->io_mutex); -+ -+ lnum = ubifs_garbage_collect(c, 0); -+ if (lnum < 0) { -+ err = lnum; -+ if (err != -ENOSPC) -+ return err; -+ -+ /* -+ * GC could not make a free LEB. But someone else may -+ * have allocated new bud for this journal head, -+ * because we dropped @wbuf->io_mutex, so try once -+ * again. -+ */ -+ dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); -+ if (retries++ < 2) { -+ dbg_jnl("retry (%d)", retries); -+ goto again; -+ } -+ -+ dbg_jnl("return -ENOSPC"); -+ return err; -+ } -+ -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ dbg_jnl("got LEB %d for jhead %d", lnum, jhead); -+ avail = c->leb_size - wbuf->offs - wbuf->used; -+ -+ if (wbuf->lnum != -1 && avail >= len) { -+ /* -+ * Someone else has switched the journal head and we have -+ * enough space now. This happens when more then one process is -+ * trying to write to the same journal head at the same time. -+ */ -+ dbg_jnl("return LEB %d back, already have LEB %d:%d", -+ lnum, wbuf->lnum, wbuf->offs + wbuf->used); -+ err = ubifs_return_leb(c, lnum); -+ if (err) -+ goto out_unlock; -+ return 0; -+ } -+ -+ err = ubifs_add_bud_to_log(c, jhead, lnum, 0); -+ if (err) -+ goto out_return; -+ offs = 0; -+ -+out: -+ err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); -+ if (err) -+ goto out_unlock; -+ -+ return 0; -+ -+out_unlock: -+ mutex_unlock(&wbuf->io_mutex); -+ return err; -+ -+out_return: -+ /* An error occurred and the LEB has to be returned to lprops */ -+ ubifs_assert(err < 0); -+ err1 = ubifs_return_leb(c, lnum); -+ if (err1 && err == -EAGAIN) -+ /* -+ * Return original error code only if it is not %-EAGAIN, -+ * which is not really an error. Otherwise, return the error -+ * code of 'ubifs_return_leb()'. -+ */ -+ err = err1; -+ mutex_unlock(&wbuf->io_mutex); -+ return err; -+} -+ -+/** -+ * write_node - write node to a journal head. -+ * @c: UBIFS file-system description object -+ * @jhead: journal head -+ * @node: node to write -+ * @len: node length -+ * @lnum: LEB number written is returned here -+ * @offs: offset written is returned here -+ * -+ * This function writes a node to reserved space of journal head @jhead. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+static int write_node(struct ubifs_info *c, int jhead, void *node, int len, -+ int *lnum, int *offs) -+{ -+ struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; -+ -+ ubifs_assert(jhead != GCHD); -+ -+ *lnum = c->jheads[jhead].wbuf.lnum; -+ *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; -+ -+ dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); -+ ubifs_prepare_node(c, node, len, 0); -+ -+ return ubifs_wbuf_write_nolock(wbuf, node, len); -+} -+ -+/** -+ * write_head - write data to a journal head. -+ * @c: UBIFS file-system description object -+ * @jhead: journal head -+ * @buf: buffer to write -+ * @len: length to write -+ * @lnum: LEB number written is returned here -+ * @offs: offset written is returned here -+ * @sync: non-zero if the write-buffer has to by synchronized -+ * -+ * This function is the same as 'write_node()' but it does not assume the -+ * buffer it is writing is a node, so it does not prepare it (which means -+ * initializing common header and calculating CRC). -+ */ -+static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, -+ int *lnum, int *offs, int sync) -+{ -+ int err; -+ struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; -+ -+ ubifs_assert(jhead != GCHD); -+ -+ *lnum = c->jheads[jhead].wbuf.lnum; -+ *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; -+ dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); -+ -+ err = ubifs_wbuf_write_nolock(wbuf, buf, len); -+ if (err) -+ return err; -+ if (sync) -+ err = ubifs_wbuf_sync_nolock(wbuf); -+ return err; -+} -+ -+/** -+ * make_reservation - reserve journal space. -+ * @c: UBIFS file-system description object -+ * @jhead: journal head -+ * @len: how many bytes to reserve -+ * -+ * This function makes space reservation in journal head @jhead. The function -+ * takes the commit lock and locks the journal head, and the caller has to -+ * unlock the head and finish the reservation with 'finish_reservation()'. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ * -+ * Note, the journal head may be unlocked as soon as the data is written, while -+ * the commit lock has to be released after the data has been added to the -+ * TNC. -+ */ -+static int make_reservation(struct ubifs_info *c, int jhead, int len) -+{ -+ int err, cmt_retries = 0, nospc_retries = 0; -+ -+again: -+ down_read(&c->commit_sem); -+ err = reserve_space(c, jhead, len); -+ if (!err) -+ return 0; -+ up_read(&c->commit_sem); -+ -+ if (err == -ENOSPC) { -+ /* -+ * GC could not make any progress. We should try to commit -+ * once because it could make some dirty space and GC would -+ * make progress, so make the error -EAGAIN so that the below -+ * will commit and re-try. -+ */ -+ if (nospc_retries++ < 2) { -+ dbg_jnl("no space, retry"); -+ err = -EAGAIN; -+ } -+ -+ /* -+ * This means that the budgeting is incorrect. We always have -+ * to be able to write to the media, because all operations are -+ * budgeted. Deletions are not budgeted, though, but we reserve -+ * an extra LEB for them. -+ */ -+ } -+ -+ if (err != -EAGAIN) -+ goto out; -+ -+ /* -+ * -EAGAIN means that the journal is full or too large, or the above -+ * code wants to do one commit. Do this and re-try. -+ */ -+ if (cmt_retries > 128) { -+ /* -+ * This should not happen unless the journal size limitations -+ * are too tough. -+ */ -+ ubifs_err("stuck in space allocation"); -+ err = -ENOSPC; -+ goto out; -+ } else if (cmt_retries > 32) -+ ubifs_warn("too many space allocation re-tries (%d)", -+ cmt_retries); -+ -+ dbg_jnl("-EAGAIN, commit and retry (retried %d times)", -+ cmt_retries); -+ cmt_retries += 1; -+ -+ err = ubifs_run_commit(c); -+ if (err) -+ return err; -+ goto again; -+ -+out: -+ ubifs_err("cannot reserve %d bytes in jhead %d, error %d", -+ len, jhead, err); -+ if (err == -ENOSPC) { -+ /* This are some budgeting problems, print useful information */ -+ down_write(&c->commit_sem); -+ spin_lock(&c->space_lock); -+ dbg_dump_stack(); -+ dbg_dump_budg(c); -+ spin_unlock(&c->space_lock); -+ dbg_dump_lprops(c); -+ cmt_retries = dbg_check_lprops(c); -+ up_write(&c->commit_sem); -+ } -+ return err; -+} -+ -+/** -+ * release_head - release a journal head. -+ * @c: UBIFS file-system description object -+ * @jhead: journal head -+ * -+ * This function releases journal head @jhead which was locked by -+ * the 'make_reservation()' function. It has to be called after each successful -+ * 'make_reservation()' invocation. -+ */ -+static inline void release_head(struct ubifs_info *c, int jhead) -+{ -+ mutex_unlock(&c->jheads[jhead].wbuf.io_mutex); -+} -+ -+/** -+ * finish_reservation - finish a reservation. -+ * @c: UBIFS file-system description object -+ * -+ * This function finishes journal space reservation. It must be called after -+ * 'make_reservation()'. -+ */ -+static void finish_reservation(struct ubifs_info *c) -+{ -+ up_read(&c->commit_sem); -+} -+ -+/** -+ * get_dent_type - translate VFS inode mode to UBIFS directory entry type. -+ * @mode: inode mode -+ */ -+static int get_dent_type(int mode) -+{ -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ return UBIFS_ITYPE_REG; -+ case S_IFDIR: -+ return UBIFS_ITYPE_DIR; -+ case S_IFLNK: -+ return UBIFS_ITYPE_LNK; -+ case S_IFBLK: -+ return UBIFS_ITYPE_BLK; -+ case S_IFCHR: -+ return UBIFS_ITYPE_CHR; -+ case S_IFIFO: -+ return UBIFS_ITYPE_FIFO; -+ case S_IFSOCK: -+ return UBIFS_ITYPE_SOCK; -+ default: -+ BUG(); -+ } -+ return 0; -+} -+ -+/** -+ * pack_inode - pack an inode node. -+ * @c: UBIFS file-system description object -+ * @ino: buffer in which to pack inode node -+ * @inode: inode to pack -+ * @last: indicates the last node of the group -+ */ -+static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, -+ const struct inode *inode, int last) -+{ -+ int data_len = 0, last_reference = !inode->i_nlink; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ ino->ch.node_type = UBIFS_INO_NODE; -+ ino_key_init_flash(c, &ino->key, inode->i_ino); -+ ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum); -+ ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec); -+ ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); -+ ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec); -+ ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); -+ ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); -+ ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); -+ ino->uid = cpu_to_le32(inode->i_uid); -+ ino->gid = cpu_to_le32(inode->i_gid); -+ ino->mode = cpu_to_le32(inode->i_mode); -+ ino->flags = cpu_to_le32(ui->flags); -+ ino->size = cpu_to_le64(ui->ui_size); -+ ino->nlink = cpu_to_le32(inode->i_nlink); -+ ino->compr_type = cpu_to_le16(ui->compr_type); -+ ino->data_len = cpu_to_le32(ui->data_len); -+ ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt); -+ ino->xattr_size = cpu_to_le32(ui->xattr_size); -+ ino->xattr_names = cpu_to_le32(ui->xattr_names); -+ zero_ino_node_unused(ino); -+ -+ /* -+ * Drop the attached data if this is a deletion inode, the data is not -+ * needed anymore. -+ */ -+ if (!last_reference) { -+ memcpy(ino->data, ui->data, ui->data_len); -+ data_len = ui->data_len; -+ } -+ -+ ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last); -+} -+ -+/** -+ * mark_inode_clean - mark UBIFS inode as clean. -+ * @c: UBIFS file-system description object -+ * @ui: UBIFS inode to mark as clean -+ * -+ * This helper function marks UBIFS inode @ui as clean by cleaning the -+ * @ui->dirty flag and releasing its budget. Note, VFS may still treat the -+ * inode as dirty and try to write it back, but 'ubifs_write_inode()' would -+ * just do nothing. -+ */ -+static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui) -+{ -+ if (ui->dirty) -+ ubifs_release_dirty_inode_budget(c, ui); -+ ui->dirty = 0; -+} -+ -+/** -+ * ubifs_jnl_update - update inode. -+ * @c: UBIFS file-system description object -+ * @dir: parent inode or host inode in case of extended attributes -+ * @nm: directory entry name -+ * @inode: inode to update -+ * @deletion: indicates a directory entry deletion i.e unlink or rmdir -+ * @xent: non-zero if the directory entry is an extended attribute entry -+ * -+ * This function updates an inode by writing a directory entry (or extended -+ * attribute entry), the inode itself, and the parent directory inode (or the -+ * host inode) to the journal. -+ * -+ * The function writes the host inode @dir last, which is important in case of -+ * extended attributes. Indeed, then we guarantee that if the host inode gets -+ * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed, -+ * the extended attribute inode gets flushed too. And this is exactly what the -+ * user expects - synchronizing the host inode synchronizes its extended -+ * attributes. Similarly, this guarantees that if @dir is synchronized, its -+ * directory entry corresponding to @nm gets synchronized too. -+ * -+ * If the inode (@inode) or the parent directory (@dir) are synchronous, this -+ * function synchronizes the write-buffer. -+ * -+ * This function marks the @dir and @inode inodes as clean and returns zero on -+ * success. In case of failure, a negative error code is returned. -+ */ -+int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, -+ const struct qstr *nm, const struct inode *inode, -+ int deletion, int xent) -+{ -+ int err, dlen, ilen, len, lnum, ino_offs, dent_offs; -+ int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); -+ int last_reference = !!(deletion && inode->i_nlink == 0); -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ struct ubifs_inode *dir_ui = ubifs_inode(dir); -+ struct ubifs_dent_node *dent; -+ struct ubifs_ino_node *ino; -+ union ubifs_key dent_key, ino_key; -+ -+ dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu", -+ inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino); -+ ubifs_assert(dir_ui->data_len == 0); -+ ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex)); -+ -+ dlen = UBIFS_DENT_NODE_SZ + nm->len + 1; -+ ilen = UBIFS_INO_NODE_SZ; -+ -+ /* -+ * If the last reference to the inode is being deleted, then there is -+ * no need to attach and write inode data, it is being deleted anyway. -+ * And if the inode is being deleted, no need to synchronize -+ * write-buffer even if the inode is synchronous. -+ */ -+ if (!last_reference) { -+ ilen += ui->data_len; -+ sync |= IS_SYNC(inode); -+ } -+ -+ aligned_dlen = ALIGN(dlen, 8); -+ aligned_ilen = ALIGN(ilen, 8); -+ len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ; -+ dent = kmalloc(len, GFP_NOFS); -+ if (!dent) -+ return -ENOMEM; -+ -+ /* Make reservation before allocating sequence numbers */ -+ err = make_reservation(c, BASEHD, len); -+ if (err) -+ goto out_free; -+ -+ if (!xent) { -+ dent->ch.node_type = UBIFS_DENT_NODE; -+ dent_key_init(c, &dent_key, dir->i_ino, nm); -+ } else { -+ dent->ch.node_type = UBIFS_XENT_NODE; -+ xent_key_init(c, &dent_key, dir->i_ino, nm); -+ } -+ -+ key_write(c, &dent_key, dent->key); -+ dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino); -+ dent->type = get_dent_type(inode->i_mode); -+ dent->nlen = cpu_to_le16(nm->len); -+ memcpy(dent->name, nm->name, nm->len); -+ dent->name[nm->len] = '\0'; -+ zero_dent_node_unused(dent); -+ ubifs_prep_grp_node(c, dent, dlen, 0); -+ -+ ino = (void *)dent + aligned_dlen; -+ pack_inode(c, ino, inode, 0); -+ ino = (void *)ino + aligned_ilen; -+ pack_inode(c, ino, dir, 1); -+ -+ if (last_reference) { -+ err = ubifs_add_orphan(c, inode->i_ino); -+ if (err) { -+ release_head(c, BASEHD); -+ goto out_finish; -+ } -+ ui->del_cmtno = c->cmt_no; -+ } -+ -+ err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); -+ if (err) -+ goto out_release; -+ if (!sync) { -+ struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; -+ -+ ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); -+ ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino); -+ } -+ release_head(c, BASEHD); -+ kfree(dent); -+ -+ if (deletion) { -+ err = ubifs_tnc_remove_nm(c, &dent_key, nm); -+ if (err) -+ goto out_ro; -+ err = ubifs_add_dirt(c, lnum, dlen); -+ } else -+ err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm); -+ if (err) -+ goto out_ro; -+ -+ /* -+ * Note, we do not remove the inode from TNC even if the last reference -+ * to it has just been deleted, because the inode may still be opened. -+ * Instead, the inode has been added to orphan lists and the orphan -+ * subsystem will take further care about it. -+ */ -+ ino_key_init(c, &ino_key, inode->i_ino); -+ ino_offs = dent_offs + aligned_dlen; -+ err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen); -+ if (err) -+ goto out_ro; -+ -+ ino_key_init(c, &ino_key, dir->i_ino); -+ ino_offs += aligned_ilen; -+ err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ); -+ if (err) -+ goto out_ro; -+ -+ finish_reservation(c); -+ spin_lock(&ui->ui_lock); -+ ui->synced_i_size = ui->ui_size; -+ spin_unlock(&ui->ui_lock); -+ mark_inode_clean(c, ui); -+ mark_inode_clean(c, dir_ui); -+ return 0; -+ -+out_finish: -+ finish_reservation(c); -+out_free: -+ kfree(dent); -+ return err; -+ -+out_release: -+ release_head(c, BASEHD); -+out_ro: -+ ubifs_ro_mode(c, err); -+ if (last_reference) -+ ubifs_delete_orphan(c, inode->i_ino); -+ finish_reservation(c); -+ return err; -+} -+ -+/** -+ * ubifs_jnl_write_data - write a data node to the journal. -+ * @c: UBIFS file-system description object -+ * @inode: inode the data node belongs to -+ * @key: node key -+ * @buf: buffer to write -+ * @len: data length (must not exceed %UBIFS_BLOCK_SIZE) -+ * -+ * This function writes a data node to the journal. Returns %0 if the data node -+ * was successfully written, and a negative error code in case of failure. -+ */ -+int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, -+ const union ubifs_key *key, const void *buf, int len) -+{ -+ struct ubifs_data_node *data; -+ int err, lnum, offs, compr_type, out_len; -+ int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ dbg_jnl("ino %lu, blk %u, len %d, key %s", -+ (unsigned long)key_inum(c, key), key_block(c, key), len, -+ DBGKEY(key)); -+ ubifs_assert(len <= UBIFS_BLOCK_SIZE); -+ -+ data = kmalloc(dlen, GFP_NOFS); -+ if (!data) -+ return -ENOMEM; -+ -+ data->ch.node_type = UBIFS_DATA_NODE; -+ key_write(c, key, &data->key); -+ data->size = cpu_to_le32(len); -+ zero_data_node_unused(data); -+ -+ if (!(ui->flags & UBIFS_COMPR_FL)) -+ /* Compression is disabled for this inode */ -+ compr_type = UBIFS_COMPR_NONE; -+ else -+ compr_type = ui->compr_type; -+ -+ out_len = dlen - UBIFS_DATA_NODE_SZ; -+ ubifs_compress(buf, len, &data->data, &out_len, &compr_type); -+ ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); -+ -+ dlen = UBIFS_DATA_NODE_SZ + out_len; -+ data->compr_type = cpu_to_le16(compr_type); -+ -+ /* Make reservation before allocating sequence numbers */ -+ err = make_reservation(c, DATAHD, dlen); -+ if (err) -+ goto out_free; -+ -+ err = write_node(c, DATAHD, data, dlen, &lnum, &offs); -+ if (err) -+ goto out_release; -+ ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key)); -+ release_head(c, DATAHD); -+ -+ err = ubifs_tnc_add(c, key, lnum, offs, dlen); -+ if (err) -+ goto out_ro; -+ -+ finish_reservation(c); -+ kfree(data); -+ return 0; -+ -+out_release: -+ release_head(c, DATAHD); -+out_ro: -+ ubifs_ro_mode(c, err); -+ finish_reservation(c); -+out_free: -+ kfree(data); -+ return err; -+} -+ -+/** -+ * ubifs_jnl_write_inode - flush inode to the journal. -+ * @c: UBIFS file-system description object -+ * @inode: inode to flush -+ * -+ * This function writes inode @inode to the journal. If the inode is -+ * synchronous, it also synchronizes the write-buffer. Returns zero in case of -+ * success and a negative error code in case of failure. -+ */ -+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) -+{ -+ int err, lnum, offs; -+ struct ubifs_ino_node *ino; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink; -+ -+ dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); -+ -+ /* -+ * If the inode is being deleted, do not write the attached data. No -+ * need to synchronize the write-buffer either. -+ */ -+ if (!last_reference) { -+ len += ui->data_len; -+ sync = IS_SYNC(inode); -+ } -+ ino = kmalloc(len, GFP_NOFS); -+ if (!ino) -+ return -ENOMEM; -+ -+ /* Make reservation before allocating sequence numbers */ -+ err = make_reservation(c, BASEHD, len); -+ if (err) -+ goto out_free; -+ -+ pack_inode(c, ino, inode, 1); -+ err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); -+ if (err) -+ goto out_release; -+ if (!sync) -+ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, -+ inode->i_ino); -+ release_head(c, BASEHD); -+ -+ if (last_reference) { -+ err = ubifs_tnc_remove_ino(c, inode->i_ino); -+ if (err) -+ goto out_ro; -+ ubifs_delete_orphan(c, inode->i_ino); -+ err = ubifs_add_dirt(c, lnum, len); -+ } else { -+ union ubifs_key key; -+ -+ ino_key_init(c, &key, inode->i_ino); -+ err = ubifs_tnc_add(c, &key, lnum, offs, len); -+ } -+ if (err) -+ goto out_ro; -+ -+ finish_reservation(c); -+ spin_lock(&ui->ui_lock); -+ ui->synced_i_size = ui->ui_size; -+ spin_unlock(&ui->ui_lock); -+ kfree(ino); -+ return 0; -+ -+out_release: -+ release_head(c, BASEHD); -+out_ro: -+ ubifs_ro_mode(c, err); -+ finish_reservation(c); -+out_free: -+ kfree(ino); -+ return err; -+} -+ -+/** -+ * ubifs_jnl_delete_inode - delete an inode. -+ * @c: UBIFS file-system description object -+ * @inode: inode to delete -+ * -+ * This function deletes inode @inode which includes removing it from orphans, -+ * deleting it from TNC and, in some cases, writing a deletion inode to the -+ * journal. -+ * -+ * When regular file inodes are unlinked or a directory inode is removed, the -+ * 'ubifs_jnl_update()' function writes a corresponding deletion inode and -+ * direntry to the media, and adds the inode to orphans. After this, when the -+ * last reference to this inode has been dropped, this function is called. In -+ * general, it has to write one more deletion inode to the media, because if -+ * a commit happened between 'ubifs_jnl_update()' and -+ * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal -+ * anymore, and in fact it might not be on the flash anymore, because it might -+ * have been garbage-collected already. And for optimization reasons UBIFS does -+ * not read the orphan area if it has been unmounted cleanly, so it would have -+ * no indication in the journal that there is a deleted inode which has to be -+ * removed from TNC. -+ * -+ * However, if there was no commit between 'ubifs_jnl_update()' and -+ * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion -+ * inode to the media for the second time. And this is quite a typical case. -+ * -+ * This function returns zero in case of success and a negative error code in -+ * case of failure. -+ */ -+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode) -+{ -+ int err; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ ubifs_assert(inode->i_nlink == 0); -+ -+ if (ui->del_cmtno != c->cmt_no) -+ /* A commit happened for sure */ -+ return ubifs_jnl_write_inode(c, inode); -+ -+ down_read(&c->commit_sem); -+ /* -+ * Check commit number again, because the first test has been done -+ * without @c->commit_sem, so a commit might have happened. -+ */ -+ if (ui->del_cmtno != c->cmt_no) { -+ up_read(&c->commit_sem); -+ return ubifs_jnl_write_inode(c, inode); -+ } -+ -+ err = ubifs_tnc_remove_ino(c, inode->i_ino); -+ if (err) -+ ubifs_ro_mode(c, err); -+ else -+ ubifs_delete_orphan(c, inode->i_ino); -+ up_read(&c->commit_sem); -+ return err; -+} -+ -+/** -+ * ubifs_jnl_rename - rename a directory entry. -+ * @c: UBIFS file-system description object -+ * @old_dir: parent inode of directory entry to rename -+ * @old_dentry: directory entry to rename -+ * @new_dir: parent inode of directory entry to rename -+ * @new_dentry: new directory entry (or directory entry to replace) -+ * @sync: non-zero if the write-buffer has to be synchronized -+ * -+ * This function implements the re-name operation which may involve writing up -+ * to 3 inodes and 2 directory entries. It marks the written inodes as clean -+ * and returns zero on success. In case of failure, a negative error code is -+ * returned. -+ */ -+int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, -+ const struct dentry *old_dentry, -+ const struct inode *new_dir, -+ const struct dentry *new_dentry, int sync) -+{ -+ void *p; -+ union ubifs_key key; -+ struct ubifs_dent_node *dent, *dent2; -+ int err, dlen1, dlen2, ilen, lnum, offs, len; -+ const struct inode *old_inode = old_dentry->d_inode; -+ const struct inode *new_inode = new_dentry->d_inode; -+ int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; -+ int last_reference = !!(new_inode && new_inode->i_nlink == 0); -+ int move = (old_dir != new_dir); -+ struct ubifs_inode *uninitialized_var(new_ui); -+ -+ dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", -+ old_dentry->d_name.len, old_dentry->d_name.name, -+ old_dir->i_ino, new_dentry->d_name.len, -+ new_dentry->d_name.name, new_dir->i_ino); -+ ubifs_assert(ubifs_inode(old_dir)->data_len == 0); -+ ubifs_assert(ubifs_inode(new_dir)->data_len == 0); -+ ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); -+ ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex)); -+ -+ dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1; -+ dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1; -+ if (new_inode) { -+ new_ui = ubifs_inode(new_inode); -+ ubifs_assert(mutex_is_locked(&new_ui->ui_mutex)); -+ ilen = UBIFS_INO_NODE_SZ; -+ if (!last_reference) -+ ilen += new_ui->data_len; -+ } else -+ ilen = 0; -+ -+ aligned_dlen1 = ALIGN(dlen1, 8); -+ aligned_dlen2 = ALIGN(dlen2, 8); -+ len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); -+ if (old_dir != new_dir) -+ len += plen; -+ dent = kmalloc(len, GFP_NOFS); -+ if (!dent) -+ return -ENOMEM; -+ -+ /* Make reservation before allocating sequence numbers */ -+ err = make_reservation(c, BASEHD, len); -+ if (err) -+ goto out_free; -+ -+ /* Make new dent */ -+ dent->ch.node_type = UBIFS_DENT_NODE; -+ dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name); -+ dent->inum = cpu_to_le64(old_inode->i_ino); -+ dent->type = get_dent_type(old_inode->i_mode); -+ dent->nlen = cpu_to_le16(new_dentry->d_name.len); -+ memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len); -+ dent->name[new_dentry->d_name.len] = '\0'; -+ zero_dent_node_unused(dent); -+ ubifs_prep_grp_node(c, dent, dlen1, 0); -+ -+ /* Make deletion dent */ -+ dent2 = (void *)dent + aligned_dlen1; -+ dent2->ch.node_type = UBIFS_DENT_NODE; -+ dent_key_init_flash(c, &dent2->key, old_dir->i_ino, -+ &old_dentry->d_name); -+ dent2->inum = 0; -+ dent2->type = DT_UNKNOWN; -+ dent2->nlen = cpu_to_le16(old_dentry->d_name.len); -+ memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len); -+ dent2->name[old_dentry->d_name.len] = '\0'; -+ zero_dent_node_unused(dent2); -+ ubifs_prep_grp_node(c, dent2, dlen2, 0); -+ -+ p = (void *)dent2 + aligned_dlen2; -+ if (new_inode) { -+ pack_inode(c, p, new_inode, 0); -+ p += ALIGN(ilen, 8); -+ } -+ -+ if (!move) -+ pack_inode(c, p, old_dir, 1); -+ else { -+ pack_inode(c, p, old_dir, 0); -+ p += ALIGN(plen, 8); -+ pack_inode(c, p, new_dir, 1); -+ } -+ -+ if (last_reference) { -+ err = ubifs_add_orphan(c, new_inode->i_ino); -+ if (err) { -+ release_head(c, BASEHD); -+ goto out_finish; -+ } -+ new_ui->del_cmtno = c->cmt_no; -+ } -+ -+ err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); -+ if (err) -+ goto out_release; -+ if (!sync) { -+ struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; -+ -+ ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino); -+ ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino); -+ if (new_inode) -+ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, -+ new_inode->i_ino); -+ } -+ release_head(c, BASEHD); -+ -+ dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name); -+ err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name); -+ if (err) -+ goto out_ro; -+ -+ err = ubifs_add_dirt(c, lnum, dlen2); -+ if (err) -+ goto out_ro; -+ -+ dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name); -+ err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name); -+ if (err) -+ goto out_ro; -+ -+ offs += aligned_dlen1 + aligned_dlen2; -+ if (new_inode) { -+ ino_key_init(c, &key, new_inode->i_ino); -+ err = ubifs_tnc_add(c, &key, lnum, offs, ilen); -+ if (err) -+ goto out_ro; -+ offs += ALIGN(ilen, 8); -+ } -+ -+ ino_key_init(c, &key, old_dir->i_ino); -+ err = ubifs_tnc_add(c, &key, lnum, offs, plen); -+ if (err) -+ goto out_ro; -+ -+ if (old_dir != new_dir) { -+ offs += ALIGN(plen, 8); -+ ino_key_init(c, &key, new_dir->i_ino); -+ err = ubifs_tnc_add(c, &key, lnum, offs, plen); -+ if (err) -+ goto out_ro; -+ } -+ -+ finish_reservation(c); -+ if (new_inode) { -+ mark_inode_clean(c, new_ui); -+ spin_lock(&new_ui->ui_lock); -+ new_ui->synced_i_size = new_ui->ui_size; -+ spin_unlock(&new_ui->ui_lock); -+ } -+ mark_inode_clean(c, ubifs_inode(old_dir)); -+ if (move) -+ mark_inode_clean(c, ubifs_inode(new_dir)); -+ kfree(dent); -+ return 0; -+ -+out_release: -+ release_head(c, BASEHD); -+out_ro: -+ ubifs_ro_mode(c, err); -+ if (last_reference) -+ ubifs_delete_orphan(c, new_inode->i_ino); -+out_finish: -+ finish_reservation(c); -+out_free: -+ kfree(dent); -+ return err; -+} -+ -+/** -+ * recomp_data_node - re-compress a truncated data node. -+ * @dn: data node to re-compress -+ * @new_len: new length -+ * -+ * This function is used when an inode is truncated and the last data node of -+ * the inode has to be re-compressed and re-written. -+ */ -+static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) -+{ -+ void *buf; -+ int err, len, compr_type, out_len; -+ -+ out_len = le32_to_cpu(dn->size); -+ buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS); -+ if (!buf) -+ return -ENOMEM; -+ -+ len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; -+ compr_type = le16_to_cpu(dn->compr_type); -+ err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); -+ if (err) -+ goto out; -+ -+ ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); -+ ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); -+ dn->compr_type = cpu_to_le16(compr_type); -+ dn->size = cpu_to_le32(*new_len); -+ *new_len = UBIFS_DATA_NODE_SZ + out_len; -+out: -+ kfree(buf); -+ return err; -+} -+ -+/** -+ * ubifs_jnl_truncate - update the journal for a truncation. -+ * @c: UBIFS file-system description object -+ * @inode: inode to truncate -+ * @old_size: old size -+ * @new_size: new size -+ * -+ * When the size of a file decreases due to truncation, a truncation node is -+ * written, the journal tree is updated, and the last data block is re-written -+ * if it has been affected. The inode is also updated in order to synchronize -+ * the new inode size. -+ * -+ * This function marks the inode as clean and returns zero on success. In case -+ * of failure, a negative error code is returned. -+ */ -+int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, -+ loff_t old_size, loff_t new_size) -+{ -+ union ubifs_key key, to_key; -+ struct ubifs_ino_node *ino; -+ struct ubifs_trun_node *trun; -+ struct ubifs_data_node *uninitialized_var(dn); -+ int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode); -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ ino_t inum = inode->i_ino; -+ unsigned int blk; -+ -+ dbg_jnl("ino %lu, size %lld -> %lld", -+ (unsigned long)inum, old_size, new_size); -+ ubifs_assert(!ui->data_len); -+ ubifs_assert(S_ISREG(inode->i_mode)); -+ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); -+ -+ sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + -+ UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR; -+ ino = kmalloc(sz, GFP_NOFS); -+ if (!ino) -+ return -ENOMEM; -+ -+ trun = (void *)ino + UBIFS_INO_NODE_SZ; -+ trun->ch.node_type = UBIFS_TRUN_NODE; -+ trun->inum = cpu_to_le32(inum); -+ trun->old_size = cpu_to_le64(old_size); -+ trun->new_size = cpu_to_le64(new_size); -+ zero_trun_node_unused(trun); -+ -+ dlen = new_size & (UBIFS_BLOCK_SIZE - 1); -+ if (dlen) { -+ /* Get last data block so it can be truncated */ -+ dn = (void *)trun + UBIFS_TRUN_NODE_SZ; -+ blk = new_size >> UBIFS_BLOCK_SHIFT; -+ data_key_init(c, &key, inum, blk); -+ dbg_jnl("last block key %s", DBGKEY(&key)); -+ err = ubifs_tnc_lookup(c, &key, dn); -+ if (err == -ENOENT) -+ dlen = 0; /* Not found (so it is a hole) */ -+ else if (err) -+ goto out_free; -+ else { -+ if (le32_to_cpu(dn->size) <= dlen) -+ dlen = 0; /* Nothing to do */ -+ else { -+ int compr_type = le16_to_cpu(dn->compr_type); -+ -+ if (compr_type != UBIFS_COMPR_NONE) { -+ err = recomp_data_node(dn, &dlen); -+ if (err) -+ goto out_free; -+ } else { -+ dn->size = cpu_to_le32(dlen); -+ dlen += UBIFS_DATA_NODE_SZ; -+ } -+ zero_data_node_unused(dn); -+ } -+ } -+ } -+ -+ /* Must make reservation before allocating sequence numbers */ -+ len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ; -+ if (dlen) -+ len += dlen; -+ err = make_reservation(c, BASEHD, len); -+ if (err) -+ goto out_free; -+ -+ pack_inode(c, ino, inode, 0); -+ ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); -+ if (dlen) -+ ubifs_prep_grp_node(c, dn, dlen, 1); -+ -+ err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); -+ if (err) -+ goto out_release; -+ if (!sync) -+ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum); -+ release_head(c, BASEHD); -+ -+ if (dlen) { -+ sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ; -+ err = ubifs_tnc_add(c, &key, lnum, sz, dlen); -+ if (err) -+ goto out_ro; -+ } -+ -+ ino_key_init(c, &key, inum); -+ err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ); -+ if (err) -+ goto out_ro; -+ -+ err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ); -+ if (err) -+ goto out_ro; -+ -+ bit = new_size & (UBIFS_BLOCK_SIZE - 1); -+ blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0); -+ data_key_init(c, &key, inum, blk); -+ -+ bit = old_size & (UBIFS_BLOCK_SIZE - 1); -+ blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1); -+ data_key_init(c, &to_key, inum, blk); -+ -+ err = ubifs_tnc_remove_range(c, &key, &to_key); -+ if (err) -+ goto out_ro; -+ -+ finish_reservation(c); -+ spin_lock(&ui->ui_lock); -+ ui->synced_i_size = ui->ui_size; -+ spin_unlock(&ui->ui_lock); -+ mark_inode_clean(c, ui); -+ kfree(ino); -+ return 0; -+ -+out_release: -+ release_head(c, BASEHD); -+out_ro: -+ ubifs_ro_mode(c, err); -+ finish_reservation(c); -+out_free: -+ kfree(ino); -+ return err; -+} -+ -+#ifdef CONFIG_UBIFS_FS_XATTR -+ -+/** -+ * ubifs_jnl_delete_xattr - delete an extended attribute. -+ * @c: UBIFS file-system description object -+ * @host: host inode -+ * @inode: extended attribute inode -+ * @nm: extended attribute entry name -+ * -+ * This function delete an extended attribute which is very similar to -+ * un-linking regular files - it writes a deletion xentry, a deletion inode and -+ * updates the target inode. Returns zero in case of success and a negative -+ * error code in case of failure. -+ */ -+int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, -+ const struct inode *inode, const struct qstr *nm) -+{ -+ int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen; -+ struct ubifs_dent_node *xent; -+ struct ubifs_ino_node *ino; -+ union ubifs_key xent_key, key1, key2; -+ int sync = IS_DIRSYNC(host); -+ struct ubifs_inode *host_ui = ubifs_inode(host); -+ -+ dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d", -+ host->i_ino, inode->i_ino, nm->name, -+ ubifs_inode(inode)->data_len); -+ ubifs_assert(inode->i_nlink == 0); -+ ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); -+ -+ /* -+ * Since we are deleting the inode, we do not bother to attach any data -+ * to it and assume its length is %UBIFS_INO_NODE_SZ. -+ */ -+ xlen = UBIFS_DENT_NODE_SZ + nm->len + 1; -+ aligned_xlen = ALIGN(xlen, 8); -+ hlen = host_ui->data_len + UBIFS_INO_NODE_SZ; -+ len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8); -+ -+ xent = kmalloc(len, GFP_NOFS); -+ if (!xent) -+ return -ENOMEM; -+ -+ /* Make reservation before allocating sequence numbers */ -+ err = make_reservation(c, BASEHD, len); -+ if (err) { -+ kfree(xent); -+ return err; -+ } -+ -+ xent->ch.node_type = UBIFS_XENT_NODE; -+ xent_key_init(c, &xent_key, host->i_ino, nm); -+ key_write(c, &xent_key, xent->key); -+ xent->inum = 0; -+ xent->type = get_dent_type(inode->i_mode); -+ xent->nlen = cpu_to_le16(nm->len); -+ memcpy(xent->name, nm->name, nm->len); -+ xent->name[nm->len] = '\0'; -+ zero_dent_node_unused(xent); -+ ubifs_prep_grp_node(c, xent, xlen, 0); -+ -+ ino = (void *)xent + aligned_xlen; -+ pack_inode(c, ino, inode, 0); -+ ino = (void *)ino + UBIFS_INO_NODE_SZ; -+ pack_inode(c, ino, host, 1); -+ -+ err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); -+ if (!sync && !err) -+ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino); -+ release_head(c, BASEHD); -+ kfree(xent); -+ if (err) -+ goto out_ro; -+ -+ /* Remove the extended attribute entry from TNC */ -+ err = ubifs_tnc_remove_nm(c, &xent_key, nm); -+ if (err) -+ goto out_ro; -+ err = ubifs_add_dirt(c, lnum, xlen); -+ if (err) -+ goto out_ro; -+ -+ /* -+ * Remove all nodes belonging to the extended attribute inode from TNC. -+ * Well, there actually must be only one node - the inode itself. -+ */ -+ lowest_ino_key(c, &key1, inode->i_ino); -+ highest_ino_key(c, &key2, inode->i_ino); -+ err = ubifs_tnc_remove_range(c, &key1, &key2); -+ if (err) -+ goto out_ro; -+ err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ); -+ if (err) -+ goto out_ro; -+ -+ /* And update TNC with the new host inode position */ -+ ino_key_init(c, &key1, host->i_ino); -+ err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen); -+ if (err) -+ goto out_ro; -+ -+ finish_reservation(c); -+ spin_lock(&host_ui->ui_lock); -+ host_ui->synced_i_size = host_ui->ui_size; -+ spin_unlock(&host_ui->ui_lock); -+ mark_inode_clean(c, host_ui); -+ return 0; -+ -+out_ro: -+ ubifs_ro_mode(c, err); -+ finish_reservation(c); -+ return err; -+} -+ -+/** -+ * ubifs_jnl_change_xattr - change an extended attribute. -+ * @c: UBIFS file-system description object -+ * @inode: extended attribute inode -+ * @host: host inode -+ * -+ * This function writes the updated version of an extended attribute inode and -+ * the host inode tho the journal (to the base head). The host inode is written -+ * after the extended attribute inode in order to guarantee that the extended -+ * attribute will be flushed when the inode is synchronized by 'fsync()' and -+ * consequently, the write-buffer is synchronized. This function returns zero -+ * in case of success and a negative error code in case of failure. -+ */ -+int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, -+ const struct inode *host) -+{ -+ int err, len1, len2, aligned_len, aligned_len1, lnum, offs; -+ struct ubifs_inode *host_ui = ubifs_inode(host); -+ struct ubifs_ino_node *ino; -+ union ubifs_key key; -+ int sync = IS_DIRSYNC(host); -+ -+ dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino); -+ ubifs_assert(host->i_nlink > 0); -+ ubifs_assert(inode->i_nlink > 0); -+ ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); -+ -+ len1 = UBIFS_INO_NODE_SZ + host_ui->data_len; -+ len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len; -+ aligned_len1 = ALIGN(len1, 8); -+ aligned_len = aligned_len1 + ALIGN(len2, 8); -+ -+ ino = kmalloc(aligned_len, GFP_NOFS); -+ if (!ino) -+ return -ENOMEM; -+ -+ /* Make reservation before allocating sequence numbers */ -+ err = make_reservation(c, BASEHD, aligned_len); -+ if (err) -+ goto out_free; -+ -+ pack_inode(c, ino, host, 0); -+ pack_inode(c, (void *)ino + aligned_len1, inode, 1); -+ -+ err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); -+ if (!sync && !err) { -+ struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; -+ -+ ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino); -+ ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); -+ } -+ release_head(c, BASEHD); -+ if (err) -+ goto out_ro; -+ -+ ino_key_init(c, &key, host->i_ino); -+ err = ubifs_tnc_add(c, &key, lnum, offs, len1); -+ if (err) -+ goto out_ro; -+ -+ ino_key_init(c, &key, inode->i_ino); -+ err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2); -+ if (err) -+ goto out_ro; -+ -+ finish_reservation(c); -+ spin_lock(&host_ui->ui_lock); -+ host_ui->synced_i_size = host_ui->ui_size; -+ spin_unlock(&host_ui->ui_lock); -+ mark_inode_clean(c, host_ui); -+ kfree(ino); -+ return 0; -+ -+out_ro: -+ ubifs_ro_mode(c, err); -+ finish_reservation(c); -+out_free: -+ kfree(ino); -+ return err; -+} -+ -+#endif /* CONFIG_UBIFS_FS_XATTR */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/key.h linux-2.6.24/fs/ubifs/key.h ---- linux-2.6.24.orig/fs/ubifs/key.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/key.h 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,557 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This header contains various key-related definitions and helper function. -+ * UBIFS allows several key schemes, so we access key fields only via these -+ * helpers. At the moment only one key scheme is supported. -+ * -+ * Simple key scheme -+ * ~~~~~~~~~~~~~~~~~ -+ * -+ * Keys are 64-bits long. First 32-bits are inode number (parent inode number -+ * in case of direntry key). Next 3 bits are node type. The last 29 bits are -+ * 4KiB offset in case of inode node, and direntry hash in case of a direntry -+ * node. We use "r5" hash borrowed from reiserfs. -+ */ -+ -+#ifndef __UBIFS_KEY_H__ -+#define __UBIFS_KEY_H__ -+ -+/** -+ * key_mask_hash - mask a valid hash value. -+ * @val: value to be masked -+ * -+ * We use hash values as offset in directories, so values %0 and %1 are -+ * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This -+ * function makes sure the reserved values are not used. -+ */ -+static inline uint32_t key_mask_hash(uint32_t hash) -+{ -+ hash &= UBIFS_S_KEY_HASH_MASK; -+ if (unlikely(hash <= 2)) -+ hash += 3; -+ return hash; -+} -+ -+/** -+ * key_r5_hash - R5 hash function (borrowed from reiserfs). -+ * @s: direntry name -+ * @len: name length -+ */ -+static inline uint32_t key_r5_hash(const char *s, int len) -+{ -+ uint32_t a = 0; -+ const signed char *str = (const signed char *)s; -+ -+ while (*str) { -+ a += *str << 4; -+ a += *str >> 4; -+ a *= 11; -+ str++; -+ } -+ -+ return key_mask_hash(a); -+} -+ -+/** -+ * key_test_hash - testing hash function. -+ * @str: direntry name -+ * @len: name length -+ */ -+static inline uint32_t key_test_hash(const char *str, int len) -+{ -+ uint32_t a = 0; -+ -+ len = min_t(uint32_t, len, 4); -+ memcpy(&a, str, len); -+ return key_mask_hash(a); -+} -+ -+/** -+ * ino_key_init - initialize inode key. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: inode number -+ */ -+static inline void ino_key_init(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum) -+{ -+ key->u32[0] = inum; -+ key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS; -+} -+ -+/** -+ * ino_key_init_flash - initialize on-flash inode key. -+ * @c: UBIFS file-system description object -+ * @k: key to initialize -+ * @inum: inode number -+ */ -+static inline void ino_key_init_flash(const struct ubifs_info *c, void *k, -+ ino_t inum) -+{ -+ union ubifs_key *key = k; -+ -+ key->j32[0] = cpu_to_le32(inum); -+ key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS); -+ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); -+} -+ -+/** -+ * lowest_ino_key - get the lowest possible inode key. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: inode number -+ */ -+static inline void lowest_ino_key(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum) -+{ -+ key->u32[0] = inum; -+ key->u32[1] = 0; -+} -+ -+/** -+ * highest_ino_key - get the highest possible inode key. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: inode number -+ */ -+static inline void highest_ino_key(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum) -+{ -+ key->u32[0] = inum; -+ key->u32[1] = 0xffffffff; -+} -+ -+/** -+ * dent_key_init - initialize directory entry key. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: parent inode number -+ * @nm: direntry name and length -+ */ -+static inline void dent_key_init(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum, -+ const struct qstr *nm) -+{ -+ uint32_t hash = c->key_hash(nm->name, nm->len); -+ -+ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); -+ key->u32[0] = inum; -+ key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); -+} -+ -+/** -+ * dent_key_init_hash - initialize directory entry key without re-calculating -+ * hash function. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: parent inode number -+ * @hash: direntry name hash -+ */ -+static inline void dent_key_init_hash(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum, -+ uint32_t hash) -+{ -+ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); -+ key->u32[0] = inum; -+ key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); -+} -+ -+/** -+ * dent_key_init_flash - initialize on-flash directory entry key. -+ * @c: UBIFS file-system description object -+ * @k: key to initialize -+ * @inum: parent inode number -+ * @nm: direntry name and length -+ */ -+static inline void dent_key_init_flash(const struct ubifs_info *c, void *k, -+ ino_t inum, const struct qstr *nm) -+{ -+ union ubifs_key *key = k; -+ uint32_t hash = c->key_hash(nm->name, nm->len); -+ -+ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); -+ key->j32[0] = cpu_to_le32(inum); -+ key->j32[1] = cpu_to_le32(hash | -+ (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS)); -+ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); -+} -+ -+/** -+ * lowest_dent_key - get the lowest possible directory entry key. -+ * @c: UBIFS file-system description object -+ * @key: where to store the lowest key -+ * @inum: parent inode number -+ */ -+static inline void lowest_dent_key(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum) -+{ -+ key->u32[0] = inum; -+ key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS; -+} -+ -+/** -+ * xent_key_init - initialize extended attribute entry key. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: host inode number -+ * @nm: extended attribute entry name and length -+ */ -+static inline void xent_key_init(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum, -+ const struct qstr *nm) -+{ -+ uint32_t hash = c->key_hash(nm->name, nm->len); -+ -+ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); -+ key->u32[0] = inum; -+ key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); -+} -+ -+/** -+ * xent_key_init_hash - initialize extended attribute entry key without -+ * re-calculating hash function. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: host inode number -+ * @hash: extended attribute entry name hash -+ */ -+static inline void xent_key_init_hash(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum, -+ uint32_t hash) -+{ -+ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); -+ key->u32[0] = inum; -+ key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); -+} -+ -+/** -+ * xent_key_init_flash - initialize on-flash extended attribute entry key. -+ * @c: UBIFS file-system description object -+ * @k: key to initialize -+ * @inum: host inode number -+ * @nm: extended attribute entry name and length -+ */ -+static inline void xent_key_init_flash(const struct ubifs_info *c, void *k, -+ ino_t inum, const struct qstr *nm) -+{ -+ union ubifs_key *key = k; -+ uint32_t hash = c->key_hash(nm->name, nm->len); -+ -+ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); -+ key->j32[0] = cpu_to_le32(inum); -+ key->j32[1] = cpu_to_le32(hash | -+ (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS)); -+ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); -+} -+ -+/** -+ * lowest_xent_key - get the lowest possible extended attribute entry key. -+ * @c: UBIFS file-system description object -+ * @key: where to store the lowest key -+ * @inum: host inode number -+ */ -+static inline void lowest_xent_key(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum) -+{ -+ key->u32[0] = inum; -+ key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS; -+} -+ -+/** -+ * data_key_init - initialize data key. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: inode number -+ * @block: block number -+ */ -+static inline void data_key_init(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum, -+ unsigned int block) -+{ -+ ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); -+ key->u32[0] = inum; -+ key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS); -+} -+ -+/** -+ * data_key_init_flash - initialize on-flash data key. -+ * @c: UBIFS file-system description object -+ * @k: key to initialize -+ * @inum: inode number -+ * @block: block number -+ */ -+static inline void data_key_init_flash(const struct ubifs_info *c, void *k, -+ ino_t inum, unsigned int block) -+{ -+ union ubifs_key *key = k; -+ -+ ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); -+ key->j32[0] = cpu_to_le32(inum); -+ key->j32[1] = cpu_to_le32(block | -+ (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); -+ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); -+} -+ -+/** -+ * trun_key_init - initialize truncation node key. -+ * @c: UBIFS file-system description object -+ * @key: key to initialize -+ * @inum: inode number -+ * -+ * Note, UBIFS does not have truncation keys on the media and this function is -+ * only used for purposes of replay. -+ */ -+static inline void trun_key_init(const struct ubifs_info *c, -+ union ubifs_key *key, ino_t inum) -+{ -+ key->u32[0] = inum; -+ key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS; -+} -+ -+/** -+ * key_type - get key type. -+ * @c: UBIFS file-system description object -+ * @key: key to get type of -+ */ -+static inline int key_type(const struct ubifs_info *c, -+ const union ubifs_key *key) -+{ -+ return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS; -+} -+ -+/** -+ * key_type_flash - get type of a on-flash formatted key. -+ * @c: UBIFS file-system description object -+ * @k: key to get type of -+ */ -+static inline int key_type_flash(const struct ubifs_info *c, const void *k) -+{ -+ const union ubifs_key *key = k; -+ -+ return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS; -+} -+ -+/** -+ * key_inum - fetch inode number from key. -+ * @c: UBIFS file-system description object -+ * @k: key to fetch inode number from -+ */ -+static inline ino_t key_inum(const struct ubifs_info *c, const void *k) -+{ -+ const union ubifs_key *key = k; -+ -+ return key->u32[0]; -+} -+ -+/** -+ * key_inum_flash - fetch inode number from an on-flash formatted key. -+ * @c: UBIFS file-system description object -+ * @k: key to fetch inode number from -+ */ -+static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) -+{ -+ const union ubifs_key *key = k; -+ -+ return le32_to_cpu(key->j32[0]); -+} -+ -+/** -+ * key_hash - get directory entry hash. -+ * @c: UBIFS file-system description object -+ * @key: the key to get hash from -+ */ -+static inline int key_hash(const struct ubifs_info *c, -+ const union ubifs_key *key) -+{ -+ return key->u32[1] & UBIFS_S_KEY_HASH_MASK; -+} -+ -+/** -+ * key_hash_flash - get directory entry hash from an on-flash formatted key. -+ * @c: UBIFS file-system description object -+ * @k: the key to get hash from -+ */ -+static inline int key_hash_flash(const struct ubifs_info *c, const void *k) -+{ -+ const union ubifs_key *key = k; -+ -+ return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK; -+} -+ -+/** -+ * key_block - get data block number. -+ * @c: UBIFS file-system description object -+ * @key: the key to get the block number from -+ */ -+static inline unsigned int key_block(const struct ubifs_info *c, -+ const union ubifs_key *key) -+{ -+ return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK; -+} -+ -+/** -+ * key_block_flash - get data block number from an on-flash formatted key. -+ * @c: UBIFS file-system description object -+ * @k: the key to get the block number from -+ */ -+static inline unsigned int key_block_flash(const struct ubifs_info *c, -+ const void *k) -+{ -+ const union ubifs_key *key = k; -+ -+ return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK; -+} -+ -+/** -+ * key_read - transform a key to in-memory format. -+ * @c: UBIFS file-system description object -+ * @from: the key to transform -+ * @to: the key to store the result -+ */ -+static inline void key_read(const struct ubifs_info *c, const void *from, -+ union ubifs_key *to) -+{ -+ const union ubifs_key *f = from; -+ -+ to->u32[0] = le32_to_cpu(f->j32[0]); -+ to->u32[1] = le32_to_cpu(f->j32[1]); -+} -+ -+/** -+ * key_write - transform a key from in-memory format. -+ * @c: UBIFS file-system description object -+ * @from: the key to transform -+ * @to: the key to store the result -+ */ -+static inline void key_write(const struct ubifs_info *c, -+ const union ubifs_key *from, void *to) -+{ -+ union ubifs_key *t = to; -+ -+ t->j32[0] = cpu_to_le32(from->u32[0]); -+ t->j32[1] = cpu_to_le32(from->u32[1]); -+ memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8); -+} -+ -+/** -+ * key_write_idx - transform a key from in-memory format for the index. -+ * @c: UBIFS file-system description object -+ * @from: the key to transform -+ * @to: the key to store the result -+ */ -+static inline void key_write_idx(const struct ubifs_info *c, -+ const union ubifs_key *from, void *to) -+{ -+ union ubifs_key *t = to; -+ -+ t->j32[0] = cpu_to_le32(from->u32[0]); -+ t->j32[1] = cpu_to_le32(from->u32[1]); -+} -+ -+/** -+ * key_copy - copy a key. -+ * @c: UBIFS file-system description object -+ * @from: the key to copy from -+ * @to: the key to copy to -+ */ -+static inline void key_copy(const struct ubifs_info *c, -+ const union ubifs_key *from, union ubifs_key *to) -+{ -+ to->u64[0] = from->u64[0]; -+} -+ -+/** -+ * keys_cmp - compare keys. -+ * @c: UBIFS file-system description object -+ * @key1: the first key to compare -+ * @key2: the second key to compare -+ * -+ * This function compares 2 keys and returns %-1 if @key1 is less than -+ * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2. -+ */ -+static inline int keys_cmp(const struct ubifs_info *c, -+ const union ubifs_key *key1, -+ const union ubifs_key *key2) -+{ -+ if (key1->u32[0] < key2->u32[0]) -+ return -1; -+ if (key1->u32[0] > key2->u32[0]) -+ return 1; -+ if (key1->u32[1] < key2->u32[1]) -+ return -1; -+ if (key1->u32[1] > key2->u32[1]) -+ return 1; -+ -+ return 0; -+} -+ -+/** -+ * keys_eq - determine if keys are equivalent. -+ * @c: UBIFS file-system description object -+ * @key1: the first key to compare -+ * @key2: the second key to compare -+ * -+ * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and -+ * %0 if not. -+ */ -+static inline int keys_eq(const struct ubifs_info *c, -+ const union ubifs_key *key1, -+ const union ubifs_key *key2) -+{ -+ if (key1->u32[0] != key2->u32[0]) -+ return 0; -+ if (key1->u32[1] != key2->u32[1]) -+ return 0; -+ return 1; -+} -+ -+/** -+ * is_hash_key - is a key vulnerable to hash collisions. -+ * @c: UBIFS file-system description object -+ * @key: key -+ * -+ * This function returns %1 if @key is a hashed key or %0 otherwise. -+ */ -+static inline int is_hash_key(const struct ubifs_info *c, -+ const union ubifs_key *key) -+{ -+ int type = key_type(c, key); -+ -+ return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY; -+} -+ -+/** -+ * key_max_inode_size - get maximum file size allowed by current key format. -+ * @c: UBIFS file-system description object -+ */ -+static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) -+{ -+ switch (c->key_fmt) { -+ case UBIFS_SIMPLE_KEY_FMT: -+ return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE; -+ default: -+ return 0; -+ } -+} -+#endif /* !__UBIFS_KEY_H__ */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/log.c linux-2.6.24/fs/ubifs/log.c ---- linux-2.6.24.orig/fs/ubifs/log.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/log.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,807 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file is a part of UBIFS journal implementation and contains various -+ * functions which manipulate the log. The log is a fixed area on the flash -+ * which does not contain any data but refers to buds. The log is a part of the -+ * journal. -+ */ -+ -+#include "ubifs.h" -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+static int dbg_check_bud_bytes(struct ubifs_info *c); -+#else -+#define dbg_check_bud_bytes(c) 0 -+#endif -+ -+/** -+ * ubifs_search_bud - search bud LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: logical eraseblock number to search -+ * -+ * This function searches bud LEB @lnum. Returns bud description object in case -+ * of success and %NULL if there is no bud with this LEB number. -+ */ -+struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) -+{ -+ struct rb_node *p; -+ struct ubifs_bud *bud; -+ -+ spin_lock(&c->buds_lock); -+ p = c->buds.rb_node; -+ while (p) { -+ bud = rb_entry(p, struct ubifs_bud, rb); -+ if (lnum < bud->lnum) -+ p = p->rb_left; -+ else if (lnum > bud->lnum) -+ p = p->rb_right; -+ else { -+ spin_unlock(&c->buds_lock); -+ return bud; -+ } -+ } -+ spin_unlock(&c->buds_lock); -+ return NULL; -+} -+ -+/** -+ * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one. -+ * @c: UBIFS file-system description object -+ * @lnum: logical eraseblock number to search -+ * -+ * This functions returns the wbuf for @lnum or %NULL if there is not one. -+ */ -+struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) -+{ -+ struct rb_node *p; -+ struct ubifs_bud *bud; -+ int jhead; -+ -+ if (!c->jheads) -+ return NULL; -+ -+ spin_lock(&c->buds_lock); -+ p = c->buds.rb_node; -+ while (p) { -+ bud = rb_entry(p, struct ubifs_bud, rb); -+ if (lnum < bud->lnum) -+ p = p->rb_left; -+ else if (lnum > bud->lnum) -+ p = p->rb_right; -+ else { -+ jhead = bud->jhead; -+ spin_unlock(&c->buds_lock); -+ return &c->jheads[jhead].wbuf; -+ } -+ } -+ spin_unlock(&c->buds_lock); -+ return NULL; -+} -+ -+/** -+ * next_log_lnum - switch to the next log LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: current log LEB -+ */ -+static inline int next_log_lnum(const struct ubifs_info *c, int lnum) -+{ -+ lnum += 1; -+ if (lnum > c->log_last) -+ lnum = UBIFS_LOG_LNUM; -+ -+ return lnum; -+} -+ -+/** -+ * empty_log_bytes - calculate amount of empty space in the log. -+ * @c: UBIFS file-system description object -+ */ -+static inline long long empty_log_bytes(const struct ubifs_info *c) -+{ -+ long long h, t; -+ -+ h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; -+ t = (long long)c->ltail_lnum * c->leb_size; -+ -+ if (h >= t) -+ return c->log_bytes - h + t; -+ else -+ return t - h; -+} -+ -+/** -+ * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. -+ * @c: UBIFS file-system description object -+ * @bud: the bud to add -+ */ -+void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) -+{ -+ struct rb_node **p, *parent = NULL; -+ struct ubifs_bud *b; -+ struct ubifs_jhead *jhead; -+ -+ spin_lock(&c->buds_lock); -+ p = &c->buds.rb_node; -+ while (*p) { -+ parent = *p; -+ b = rb_entry(parent, struct ubifs_bud, rb); -+ ubifs_assert(bud->lnum != b->lnum); -+ if (bud->lnum < b->lnum) -+ p = &(*p)->rb_left; -+ else -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&bud->rb, parent, p); -+ rb_insert_color(&bud->rb, &c->buds); -+ if (c->jheads) { -+ jhead = &c->jheads[bud->jhead]; -+ list_add_tail(&bud->list, &jhead->buds_list); -+ } else -+ ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); -+ -+ /* -+ * Note, although this is a new bud, we anyway account this space now, -+ * before any data has been written to it, because this is about to -+ * guarantee fixed mount time, and this bud will anyway be read and -+ * scanned. -+ */ -+ c->bud_bytes += c->leb_size - bud->start; -+ -+ dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, -+ bud->start, bud->jhead, c->bud_bytes); -+ spin_unlock(&c->buds_lock); -+} -+ -+/** -+ * ubifs_create_buds_lists - create journal head buds lists for remount rw. -+ * @c: UBIFS file-system description object -+ */ -+void ubifs_create_buds_lists(struct ubifs_info *c) -+{ -+ struct rb_node *p; -+ -+ spin_lock(&c->buds_lock); -+ p = rb_first(&c->buds); -+ while (p) { -+ struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); -+ struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; -+ -+ list_add_tail(&bud->list, &jhead->buds_list); -+ p = rb_next(p); -+ } -+ spin_unlock(&c->buds_lock); -+} -+ -+/** -+ * ubifs_add_bud_to_log - add a new bud to the log. -+ * @c: UBIFS file-system description object -+ * @jhead: journal head the bud belongs to -+ * @lnum: LEB number of the bud -+ * @offs: starting offset of the bud -+ * -+ * This function writes reference node for the new bud LEB @lnum it to the log, -+ * and adds it to the buds tress. It also makes sure that log size does not -+ * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success, -+ * %-EAGAIN if commit is required, and a negative error codes in case of -+ * failure. -+ */ -+int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) -+{ -+ int err; -+ struct ubifs_bud *bud; -+ struct ubifs_ref_node *ref; -+ -+ bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS); -+ if (!bud) -+ return -ENOMEM; -+ ref = kzalloc(c->ref_node_alsz, GFP_NOFS); -+ if (!ref) { -+ kfree(bud); -+ return -ENOMEM; -+ } -+ -+ mutex_lock(&c->log_mutex); -+ -+ if (c->ro_media) { -+ err = -EROFS; -+ goto out_unlock; -+ } -+ -+ /* Make sure we have enough space in the log */ -+ if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) { -+ dbg_log("not enough log space - %lld, required %d", -+ empty_log_bytes(c), c->min_log_bytes); -+ ubifs_commit_required(c); -+ err = -EAGAIN; -+ goto out_unlock; -+ } -+ -+ /* -+ * Make sure the the amount of space in buds will not exceed -+ * 'c->max_bud_bytes' limit, because we want to guarantee mount time -+ * limits. -+ * -+ * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes -+ * because we are holding @c->log_mutex. All @c->bud_bytes take place -+ * when both @c->log_mutex and @c->bud_bytes are locked. -+ */ -+ if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) { -+ dbg_log("bud bytes %lld (%lld max), require commit", -+ c->bud_bytes, c->max_bud_bytes); -+ ubifs_commit_required(c); -+ err = -EAGAIN; -+ goto out_unlock; -+ } -+ -+ /* -+ * If the journal is full enough - start background commit. Note, it is -+ * OK to read 'c->cmt_state' without spinlock because integer reads -+ * are atomic in the kernel. -+ */ -+ if (c->bud_bytes >= c->bg_bud_bytes && -+ c->cmt_state == COMMIT_RESTING) { -+ dbg_log("bud bytes %lld (%lld max), initiate BG commit", -+ c->bud_bytes, c->max_bud_bytes); -+ ubifs_request_bg_commit(c); -+ } -+ -+ bud->lnum = lnum; -+ bud->start = offs; -+ bud->jhead = jhead; -+ -+ ref->ch.node_type = UBIFS_REF_NODE; -+ ref->lnum = cpu_to_le32(bud->lnum); -+ ref->offs = cpu_to_le32(bud->start); -+ ref->jhead = cpu_to_le32(jhead); -+ -+ if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { -+ c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); -+ c->lhead_offs = 0; -+ } -+ -+ if (c->lhead_offs == 0) { -+ /* Must ensure next log LEB has been unmapped */ -+ err = ubifs_leb_unmap(c, c->lhead_lnum); -+ if (err) -+ goto out_unlock; -+ } -+ -+ if (bud->start == 0) { -+ /* -+ * Before writing the LEB reference which refers an empty LEB -+ * to the log, we have to make sure it is mapped, because -+ * otherwise we'd risk to refer an LEB with garbage in case of -+ * an unclean reboot, because the target LEB might have been -+ * unmapped, but not yet physically erased. -+ */ -+ err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); -+ if (err) -+ goto out_unlock; -+ } -+ -+ dbg_log("write ref LEB %d:%d", -+ c->lhead_lnum, c->lhead_offs); -+ err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum, -+ c->lhead_offs, UBI_SHORTTERM); -+ if (err) -+ goto out_unlock; -+ -+ c->lhead_offs += c->ref_node_alsz; -+ -+ ubifs_add_bud(c, bud); -+ -+ mutex_unlock(&c->log_mutex); -+ kfree(ref); -+ return 0; -+ -+out_unlock: -+ if (err != -EAGAIN) -+ ubifs_ro_mode(c, err); -+ mutex_unlock(&c->log_mutex); -+ kfree(ref); -+ kfree(bud); -+ return err; -+} -+ -+/** -+ * remove_buds - remove used buds. -+ * @c: UBIFS file-system description object -+ * -+ * This function removes use buds from the buds tree. It does not remove the -+ * buds which are pointed to by journal heads. -+ */ -+static void remove_buds(struct ubifs_info *c) -+{ -+ struct rb_node *p; -+ -+ ubifs_assert(list_empty(&c->old_buds)); -+ c->cmt_bud_bytes = 0; -+ spin_lock(&c->buds_lock); -+ p = rb_first(&c->buds); -+ while (p) { -+ struct rb_node *p1 = p; -+ struct ubifs_bud *bud; -+ struct ubifs_wbuf *wbuf; -+ -+ p = rb_next(p); -+ bud = rb_entry(p1, struct ubifs_bud, rb); -+ wbuf = &c->jheads[bud->jhead].wbuf; -+ -+ if (wbuf->lnum == bud->lnum) { -+ /* -+ * Do not remove buds which are pointed to by journal -+ * heads (non-closed buds). -+ */ -+ c->cmt_bud_bytes += wbuf->offs - bud->start; -+ dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " -+ "cmt_bud_bytes %lld", bud->lnum, bud->start, -+ bud->jhead, wbuf->offs - bud->start, -+ c->cmt_bud_bytes); -+ bud->start = wbuf->offs; -+ } else { -+ c->cmt_bud_bytes += c->leb_size - bud->start; -+ dbg_log("remove %d:%d, jhead %d, bud bytes %d, " -+ "cmt_bud_bytes %lld", bud->lnum, bud->start, -+ bud->jhead, c->leb_size - bud->start, -+ c->cmt_bud_bytes); -+ rb_erase(p1, &c->buds); -+ list_del(&bud->list); -+ /* -+ * If the commit does not finish, the recovery will need -+ * to replay the journal, in which case the old buds -+ * must be unchanged. Do not release them until post -+ * commit i.e. do not allow them to be garbage -+ * collected. -+ */ -+ list_add(&bud->list, &c->old_buds); -+ } -+ } -+ spin_unlock(&c->buds_lock); -+} -+ -+/** -+ * ubifs_log_start_commit - start commit. -+ * @c: UBIFS file-system description object -+ * @ltail_lnum: return new log tail LEB number -+ * -+ * The commit operation starts with writing "commit start" node to the log and -+ * reference nodes for all journal heads which will define new journal after -+ * the commit has been finished. The commit start and reference nodes are -+ * written in one go to the nearest empty log LEB (hence, when commit is -+ * finished UBIFS may safely unmap all the previous log LEBs). This function -+ * returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) -+{ -+ void *buf; -+ struct ubifs_cs_node *cs; -+ struct ubifs_ref_node *ref; -+ int err, i, max_len, len; -+ -+ err = dbg_check_bud_bytes(c); -+ if (err) -+ return err; -+ -+ max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ; -+ max_len = ALIGN(max_len, c->min_io_size); -+ buf = cs = kmalloc(max_len, GFP_NOFS); -+ if (!buf) -+ return -ENOMEM; -+ -+ cs->ch.node_type = UBIFS_CS_NODE; -+ cs->cmt_no = cpu_to_le64(c->cmt_no); -+ ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); -+ -+ /* -+ * Note, we do not lock 'c->log_mutex' because this is the commit start -+ * phase and we are exclusively using the log. And we do not lock -+ * write-buffer because nobody can write to the file-system at this -+ * phase. -+ */ -+ -+ len = UBIFS_CS_NODE_SZ; -+ for (i = 0; i < c->jhead_cnt; i++) { -+ int lnum = c->jheads[i].wbuf.lnum; -+ int offs = c->jheads[i].wbuf.offs; -+ -+ if (lnum == -1 || offs == c->leb_size) -+ continue; -+ -+ dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); -+ ref = buf + len; -+ ref->ch.node_type = UBIFS_REF_NODE; -+ ref->lnum = cpu_to_le32(lnum); -+ ref->offs = cpu_to_le32(offs); -+ ref->jhead = cpu_to_le32(i); -+ -+ ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0); -+ len += UBIFS_REF_NODE_SZ; -+ } -+ -+ ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len); -+ -+ /* Switch to the next log LEB */ -+ if (c->lhead_offs) { -+ c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); -+ c->lhead_offs = 0; -+ } -+ -+ if (c->lhead_offs == 0) { -+ /* Must ensure next LEB has been unmapped */ -+ err = ubifs_leb_unmap(c, c->lhead_lnum); -+ if (err) -+ goto out; -+ } -+ -+ len = ALIGN(len, c->min_io_size); -+ dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); -+ err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM); -+ if (err) -+ goto out; -+ -+ *ltail_lnum = c->lhead_lnum; -+ -+ c->lhead_offs += len; -+ if (c->lhead_offs == c->leb_size) { -+ c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); -+ c->lhead_offs = 0; -+ } -+ -+ remove_buds(c); -+ -+ /* -+ * We have started the commit and now users may use the rest of the log -+ * for new writes. -+ */ -+ c->min_log_bytes = 0; -+ -+out: -+ kfree(buf); -+ return err; -+} -+ -+/** -+ * ubifs_log_end_commit - end commit. -+ * @c: UBIFS file-system description object -+ * @ltail_lnum: new log tail LEB number -+ * -+ * This function is called on when the commit operation was finished. It -+ * moves log tail to new position and unmaps LEBs which contain obsolete data. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) -+{ -+ int err; -+ -+ /* -+ * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS -+ * writes during commit. Its only short "commit" start phase when -+ * writers are blocked. -+ */ -+ mutex_lock(&c->log_mutex); -+ -+ dbg_log("old tail was LEB %d:0, new tail is LEB %d:0", -+ c->ltail_lnum, ltail_lnum); -+ -+ c->ltail_lnum = ltail_lnum; -+ /* -+ * The commit is finished and from now on it must be guaranteed that -+ * there is always enough space for the next commit. -+ */ -+ c->min_log_bytes = c->leb_size; -+ -+ spin_lock(&c->buds_lock); -+ c->bud_bytes -= c->cmt_bud_bytes; -+ spin_unlock(&c->buds_lock); -+ -+ err = dbg_check_bud_bytes(c); -+ -+ mutex_unlock(&c->log_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_log_post_commit - things to do after commit is completed. -+ * @c: UBIFS file-system description object -+ * @old_ltail_lnum: old log tail LEB number -+ * -+ * Release buds only after commit is completed, because they must be unchanged -+ * if recovery is needed. -+ * -+ * Unmap log LEBs only after commit is completed, because they may be needed for -+ * recovery. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) -+{ -+ int lnum, err = 0; -+ -+ while (!list_empty(&c->old_buds)) { -+ struct ubifs_bud *bud; -+ -+ bud = list_entry(c->old_buds.next, struct ubifs_bud, list); -+ err = ubifs_return_leb(c, bud->lnum); -+ if (err) -+ return err; -+ list_del(&bud->list); -+ kfree(bud); -+ } -+ mutex_lock(&c->log_mutex); -+ for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; -+ lnum = next_log_lnum(c, lnum)) { -+ dbg_log("unmap log LEB %d", lnum); -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ goto out; -+ } -+out: -+ mutex_unlock(&c->log_mutex); -+ return err; -+} -+ -+/** -+ * struct done_ref - references that have been done. -+ * @rb: rb-tree node -+ * @lnum: LEB number -+ */ -+struct done_ref { -+ struct rb_node rb; -+ int lnum; -+}; -+ -+/** -+ * done_already - determine if a reference has been done already. -+ * @done_tree: rb-tree to store references that have been done -+ * @lnum: LEB number of reference -+ * -+ * This function returns %1 if the reference has been done, %0 if not, otherwise -+ * a negative error code is returned. -+ */ -+static int done_already(struct rb_root *done_tree, int lnum) -+{ -+ struct rb_node **p = &done_tree->rb_node, *parent = NULL; -+ struct done_ref *dr; -+ -+ while (*p) { -+ parent = *p; -+ dr = rb_entry(parent, struct done_ref, rb); -+ if (lnum < dr->lnum) -+ p = &(*p)->rb_left; -+ else if (lnum > dr->lnum) -+ p = &(*p)->rb_right; -+ else -+ return 1; -+ } -+ -+ dr = kzalloc(sizeof(struct done_ref), GFP_NOFS); -+ if (!dr) -+ return -ENOMEM; -+ -+ dr->lnum = lnum; -+ -+ rb_link_node(&dr->rb, parent, p); -+ rb_insert_color(&dr->rb, done_tree); -+ -+ return 0; -+} -+ -+/** -+ * destroy_done_tree - destroy the done tree. -+ * @done_tree: done tree to destroy -+ */ -+static void destroy_done_tree(struct rb_root *done_tree) -+{ -+ struct rb_node *this = done_tree->rb_node; -+ struct done_ref *dr; -+ -+ while (this) { -+ if (this->rb_left) { -+ this = this->rb_left; -+ continue; -+ } else if (this->rb_right) { -+ this = this->rb_right; -+ continue; -+ } -+ dr = rb_entry(this, struct done_ref, rb); -+ this = rb_parent(this); -+ if (this) { -+ if (this->rb_left == &dr->rb) -+ this->rb_left = NULL; -+ else -+ this->rb_right = NULL; -+ } -+ kfree(dr); -+ } -+} -+ -+/** -+ * add_node - add a node to the consolidated log. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to which to add -+ * @lnum: LEB number to which to write is passed and returned here -+ * @offs: offset to where to write is passed and returned here -+ * @node: node to add -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, -+ void *node) -+{ -+ struct ubifs_ch *ch = node; -+ int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs; -+ -+ if (len > remains) { -+ int sz = ALIGN(*offs, c->min_io_size), err; -+ -+ ubifs_pad(c, buf + *offs, sz - *offs); -+ err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); -+ if (err) -+ return err; -+ *lnum = next_log_lnum(c, *lnum); -+ *offs = 0; -+ } -+ memcpy(buf + *offs, node, len); -+ *offs += ALIGN(len, 8); -+ return 0; -+} -+ -+/** -+ * ubifs_consolidate_log - consolidate the log. -+ * @c: UBIFS file-system description object -+ * -+ * Repeated failed commits could cause the log to be full, but at least 1 LEB is -+ * needed for commit. This function rewrites the reference nodes in the log -+ * omitting duplicates, and failed CS nodes, and leaving no gaps. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_consolidate_log(struct ubifs_info *c) -+{ -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ struct rb_root done_tree = RB_ROOT; -+ int lnum, err, first = 1, write_lnum, offs = 0; -+ void *buf; -+ -+ dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum, -+ c->lhead_lnum); -+ buf = vmalloc(c->leb_size); -+ if (!buf) -+ return -ENOMEM; -+ lnum = c->ltail_lnum; -+ write_lnum = lnum; -+ while (1) { -+ sleb = ubifs_scan(c, lnum, 0, c->sbuf); -+ if (IS_ERR(sleb)) { -+ err = PTR_ERR(sleb); -+ goto out_free; -+ } -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ switch (snod->type) { -+ case UBIFS_REF_NODE: { -+ struct ubifs_ref_node *ref = snod->node; -+ int ref_lnum = le32_to_cpu(ref->lnum); -+ -+ err = done_already(&done_tree, ref_lnum); -+ if (err < 0) -+ goto out_scan; -+ if (err != 1) { -+ err = add_node(c, buf, &write_lnum, -+ &offs, snod->node); -+ if (err) -+ goto out_scan; -+ } -+ break; -+ } -+ case UBIFS_CS_NODE: -+ if (!first) -+ break; -+ err = add_node(c, buf, &write_lnum, &offs, -+ snod->node); -+ if (err) -+ goto out_scan; -+ first = 0; -+ break; -+ } -+ } -+ ubifs_scan_destroy(sleb); -+ if (lnum == c->lhead_lnum) -+ break; -+ lnum = next_log_lnum(c, lnum); -+ } -+ if (offs) { -+ int sz = ALIGN(offs, c->min_io_size); -+ -+ ubifs_pad(c, buf + offs, sz - offs); -+ err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM); -+ if (err) -+ goto out_free; -+ offs = ALIGN(offs, c->min_io_size); -+ } -+ destroy_done_tree(&done_tree); -+ vfree(buf); -+ if (write_lnum == c->lhead_lnum) { -+ ubifs_err("log is too full"); -+ return -EINVAL; -+ } -+ /* Unmap remaining LEBs */ -+ lnum = write_lnum; -+ do { -+ lnum = next_log_lnum(c, lnum); -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } while (lnum != c->lhead_lnum); -+ c->lhead_lnum = write_lnum; -+ c->lhead_offs = offs; -+ dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs); -+ return 0; -+ -+out_scan: -+ ubifs_scan_destroy(sleb); -+out_free: -+ destroy_done_tree(&done_tree); -+ vfree(buf); -+ return err; -+} -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+/** -+ * dbg_check_bud_bytes - make sure bud bytes calculation are all right. -+ * @c: UBIFS file-system description object -+ * -+ * This function makes sure the amount of flash space used by closed buds -+ * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in -+ * case of failure. -+ */ -+static int dbg_check_bud_bytes(struct ubifs_info *c) -+{ -+ int i, err = 0; -+ struct ubifs_bud *bud; -+ long long bud_bytes = 0; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) -+ return 0; -+ -+ spin_lock(&c->buds_lock); -+ for (i = 0; i < c->jhead_cnt; i++) -+ list_for_each_entry(bud, &c->jheads[i].buds_list, list) -+ bud_bytes += c->leb_size - bud->start; -+ -+ if (c->bud_bytes != bud_bytes) { -+ ubifs_err("bad bud_bytes %lld, calculated %lld", -+ c->bud_bytes, bud_bytes); -+ err = -EINVAL; -+ } -+ spin_unlock(&c->buds_lock); -+ -+ return err; -+} -+ -+#endif /* CONFIG_UBIFS_FS_DEBUG */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/lprops.c linux-2.6.24/fs/ubifs/lprops.c ---- linux-2.6.24.orig/fs/ubifs/lprops.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/lprops.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1333 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements the functions that access LEB properties and their -+ * categories. LEBs are categorized based on the needs of UBIFS, and the -+ * categories are stored as either heaps or lists to provide a fast way of -+ * finding a LEB in a particular category. For example, UBIFS may need to find -+ * an empty LEB for the journal, or a very dirty LEB for garbage collection. -+ */ -+ -+#include "ubifs.h" -+ -+/** -+ * get_heap_comp_val - get the LEB properties value for heap comparisons. -+ * @lprops: LEB properties -+ * @cat: LEB category -+ */ -+static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat) -+{ -+ switch (cat) { -+ case LPROPS_FREE: -+ return lprops->free; -+ case LPROPS_DIRTY_IDX: -+ return lprops->free + lprops->dirty; -+ default: -+ return lprops->dirty; -+ } -+} -+ -+/** -+ * move_up_lpt_heap - move a new heap entry up as far as possible. -+ * @c: UBIFS file-system description object -+ * @heap: LEB category heap -+ * @lprops: LEB properties to move -+ * @cat: LEB category -+ * -+ * New entries to a heap are added at the bottom and then moved up until the -+ * parent's value is greater. In the case of LPT's category heaps, the value -+ * is either the amount of free space or the amount of dirty space, depending -+ * on the category. -+ */ -+static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, -+ struct ubifs_lprops *lprops, int cat) -+{ -+ int val1, val2, hpos; -+ -+ hpos = lprops->hpos; -+ if (!hpos) -+ return; /* Already top of the heap */ -+ val1 = get_heap_comp_val(lprops, cat); -+ /* Compare to parent and, if greater, move up the heap */ -+ do { -+ int ppos = (hpos - 1) / 2; -+ -+ val2 = get_heap_comp_val(heap->arr[ppos], cat); -+ if (val2 >= val1) -+ return; -+ /* Greater than parent so move up */ -+ heap->arr[ppos]->hpos = hpos; -+ heap->arr[hpos] = heap->arr[ppos]; -+ heap->arr[ppos] = lprops; -+ lprops->hpos = ppos; -+ hpos = ppos; -+ } while (hpos); -+} -+ -+/** -+ * adjust_lpt_heap - move a changed heap entry up or down the heap. -+ * @c: UBIFS file-system description object -+ * @heap: LEB category heap -+ * @lprops: LEB properties to move -+ * @hpos: heap position of @lprops -+ * @cat: LEB category -+ * -+ * Changed entries in a heap are moved up or down until the parent's value is -+ * greater. In the case of LPT's category heaps, the value is either the amount -+ * of free space or the amount of dirty space, depending on the category. -+ */ -+static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, -+ struct ubifs_lprops *lprops, int hpos, int cat) -+{ -+ int val1, val2, val3, cpos; -+ -+ val1 = get_heap_comp_val(lprops, cat); -+ /* Compare to parent and, if greater than parent, move up the heap */ -+ if (hpos) { -+ int ppos = (hpos - 1) / 2; -+ -+ val2 = get_heap_comp_val(heap->arr[ppos], cat); -+ if (val1 > val2) { -+ /* Greater than parent so move up */ -+ while (1) { -+ heap->arr[ppos]->hpos = hpos; -+ heap->arr[hpos] = heap->arr[ppos]; -+ heap->arr[ppos] = lprops; -+ lprops->hpos = ppos; -+ hpos = ppos; -+ if (!hpos) -+ return; -+ ppos = (hpos - 1) / 2; -+ val2 = get_heap_comp_val(heap->arr[ppos], cat); -+ if (val1 <= val2) -+ return; -+ /* Still greater than parent so keep going */ -+ } -+ } -+ } -+ -+ /* Not greater than parent, so compare to children */ -+ while (1) { -+ /* Compare to left child */ -+ cpos = hpos * 2 + 1; -+ if (cpos >= heap->cnt) -+ return; -+ val2 = get_heap_comp_val(heap->arr[cpos], cat); -+ if (val1 < val2) { -+ /* Less than left child, so promote biggest child */ -+ if (cpos + 1 < heap->cnt) { -+ val3 = get_heap_comp_val(heap->arr[cpos + 1], -+ cat); -+ if (val3 > val2) -+ cpos += 1; /* Right child is bigger */ -+ } -+ heap->arr[cpos]->hpos = hpos; -+ heap->arr[hpos] = heap->arr[cpos]; -+ heap->arr[cpos] = lprops; -+ lprops->hpos = cpos; -+ hpos = cpos; -+ continue; -+ } -+ /* Compare to right child */ -+ cpos += 1; -+ if (cpos >= heap->cnt) -+ return; -+ val3 = get_heap_comp_val(heap->arr[cpos], cat); -+ if (val1 < val3) { -+ /* Less than right child, so promote right child */ -+ heap->arr[cpos]->hpos = hpos; -+ heap->arr[hpos] = heap->arr[cpos]; -+ heap->arr[cpos] = lprops; -+ lprops->hpos = cpos; -+ hpos = cpos; -+ continue; -+ } -+ return; -+ } -+} -+ -+/** -+ * add_to_lpt_heap - add LEB properties to a LEB category heap. -+ * @c: UBIFS file-system description object -+ * @lprops: LEB properties to add -+ * @cat: LEB category -+ * -+ * This function returns %1 if @lprops is added to the heap for LEB category -+ * @cat, otherwise %0 is returned because the heap is full. -+ */ -+static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops, -+ int cat) -+{ -+ struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; -+ -+ if (heap->cnt >= heap->max_cnt) { -+ const int b = LPT_HEAP_SZ / 2 - 1; -+ int cpos, val1, val2; -+ -+ /* Compare to some other LEB on the bottom of heap */ -+ /* Pick a position kind of randomly */ -+ cpos = (((size_t)lprops >> 4) & b) + b; -+ ubifs_assert(cpos >= b); -+ ubifs_assert(cpos < LPT_HEAP_SZ); -+ ubifs_assert(cpos < heap->cnt); -+ -+ val1 = get_heap_comp_val(lprops, cat); -+ val2 = get_heap_comp_val(heap->arr[cpos], cat); -+ if (val1 > val2) { -+ struct ubifs_lprops *lp; -+ -+ lp = heap->arr[cpos]; -+ lp->flags &= ~LPROPS_CAT_MASK; -+ lp->flags |= LPROPS_UNCAT; -+ list_add(&lp->list, &c->uncat_list); -+ lprops->hpos = cpos; -+ heap->arr[cpos] = lprops; -+ move_up_lpt_heap(c, heap, lprops, cat); -+ dbg_check_heap(c, heap, cat, lprops->hpos); -+ return 1; /* Added to heap */ -+ } -+ dbg_check_heap(c, heap, cat, -1); -+ return 0; /* Not added to heap */ -+ } else { -+ lprops->hpos = heap->cnt++; -+ heap->arr[lprops->hpos] = lprops; -+ move_up_lpt_heap(c, heap, lprops, cat); -+ dbg_check_heap(c, heap, cat, lprops->hpos); -+ return 1; /* Added to heap */ -+ } -+} -+ -+/** -+ * remove_from_lpt_heap - remove LEB properties from a LEB category heap. -+ * @c: UBIFS file-system description object -+ * @lprops: LEB properties to remove -+ * @cat: LEB category -+ */ -+static void remove_from_lpt_heap(struct ubifs_info *c, -+ struct ubifs_lprops *lprops, int cat) -+{ -+ struct ubifs_lpt_heap *heap; -+ int hpos = lprops->hpos; -+ -+ heap = &c->lpt_heap[cat - 1]; -+ ubifs_assert(hpos >= 0 && hpos < heap->cnt); -+ ubifs_assert(heap->arr[hpos] == lprops); -+ heap->cnt -= 1; -+ if (hpos < heap->cnt) { -+ heap->arr[hpos] = heap->arr[heap->cnt]; -+ heap->arr[hpos]->hpos = hpos; -+ adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat); -+ } -+ dbg_check_heap(c, heap, cat, -1); -+} -+ -+/** -+ * lpt_heap_replace - replace lprops in a category heap. -+ * @c: UBIFS file-system description object -+ * @old_lprops: LEB properties to replace -+ * @new_lprops: LEB properties with which to replace -+ * @cat: LEB category -+ * -+ * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) -+ * and the lprops that the pnode contains. When that happens, references in -+ * the category heaps to those lprops must be updated to point to the new -+ * lprops. This function does that. -+ */ -+static void lpt_heap_replace(struct ubifs_info *c, -+ struct ubifs_lprops *old_lprops, -+ struct ubifs_lprops *new_lprops, int cat) -+{ -+ struct ubifs_lpt_heap *heap; -+ int hpos = new_lprops->hpos; -+ -+ heap = &c->lpt_heap[cat - 1]; -+ heap->arr[hpos] = new_lprops; -+} -+ -+/** -+ * ubifs_add_to_cat - add LEB properties to a category list or heap. -+ * @c: UBIFS file-system description object -+ * @lprops: LEB properties to add -+ * @cat: LEB category to which to add -+ * -+ * LEB properties are categorized to enable fast find operations. -+ */ -+void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, -+ int cat) -+{ -+ switch (cat) { -+ case LPROPS_DIRTY: -+ case LPROPS_DIRTY_IDX: -+ case LPROPS_FREE: -+ if (add_to_lpt_heap(c, lprops, cat)) -+ break; -+ /* No more room on heap so make it uncategorized */ -+ cat = LPROPS_UNCAT; -+ /* Fall through */ -+ case LPROPS_UNCAT: -+ list_add(&lprops->list, &c->uncat_list); -+ break; -+ case LPROPS_EMPTY: -+ list_add(&lprops->list, &c->empty_list); -+ break; -+ case LPROPS_FREEABLE: -+ list_add(&lprops->list, &c->freeable_list); -+ c->freeable_cnt += 1; -+ break; -+ case LPROPS_FRDI_IDX: -+ list_add(&lprops->list, &c->frdi_idx_list); -+ break; -+ default: -+ ubifs_assert(0); -+ } -+ lprops->flags &= ~LPROPS_CAT_MASK; -+ lprops->flags |= cat; -+} -+ -+/** -+ * ubifs_remove_from_cat - remove LEB properties from a category list or heap. -+ * @c: UBIFS file-system description object -+ * @lprops: LEB properties to remove -+ * @cat: LEB category from which to remove -+ * -+ * LEB properties are categorized to enable fast find operations. -+ */ -+static void ubifs_remove_from_cat(struct ubifs_info *c, -+ struct ubifs_lprops *lprops, int cat) -+{ -+ switch (cat) { -+ case LPROPS_DIRTY: -+ case LPROPS_DIRTY_IDX: -+ case LPROPS_FREE: -+ remove_from_lpt_heap(c, lprops, cat); -+ break; -+ case LPROPS_FREEABLE: -+ c->freeable_cnt -= 1; -+ ubifs_assert(c->freeable_cnt >= 0); -+ /* Fall through */ -+ case LPROPS_UNCAT: -+ case LPROPS_EMPTY: -+ case LPROPS_FRDI_IDX: -+ ubifs_assert(!list_empty(&lprops->list)); -+ list_del(&lprops->list); -+ break; -+ default: -+ ubifs_assert(0); -+ } -+} -+ -+/** -+ * ubifs_replace_cat - replace lprops in a category list or heap. -+ * @c: UBIFS file-system description object -+ * @old_lprops: LEB properties to replace -+ * @new_lprops: LEB properties with which to replace -+ * -+ * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) -+ * and the lprops that the pnode contains. When that happens, references in -+ * category lists and heaps must be replaced. This function does that. -+ */ -+void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, -+ struct ubifs_lprops *new_lprops) -+{ -+ int cat; -+ -+ cat = new_lprops->flags & LPROPS_CAT_MASK; -+ switch (cat) { -+ case LPROPS_DIRTY: -+ case LPROPS_DIRTY_IDX: -+ case LPROPS_FREE: -+ lpt_heap_replace(c, old_lprops, new_lprops, cat); -+ break; -+ case LPROPS_UNCAT: -+ case LPROPS_EMPTY: -+ case LPROPS_FREEABLE: -+ case LPROPS_FRDI_IDX: -+ list_replace(&old_lprops->list, &new_lprops->list); -+ break; -+ default: -+ ubifs_assert(0); -+ } -+} -+ -+/** -+ * ubifs_ensure_cat - ensure LEB properties are categorized. -+ * @c: UBIFS file-system description object -+ * @lprops: LEB properties -+ * -+ * A LEB may have fallen off of the bottom of a heap, and ended up as -+ * uncategorized even though it has enough space for us now. If that is the case -+ * this function will put the LEB back onto a heap. -+ */ -+void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) -+{ -+ int cat = lprops->flags & LPROPS_CAT_MASK; -+ -+ if (cat != LPROPS_UNCAT) -+ return; -+ cat = ubifs_categorize_lprops(c, lprops); -+ if (cat == LPROPS_UNCAT) -+ return; -+ ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT); -+ ubifs_add_to_cat(c, lprops, cat); -+} -+ -+/** -+ * ubifs_categorize_lprops - categorize LEB properties. -+ * @c: UBIFS file-system description object -+ * @lprops: LEB properties to categorize -+ * -+ * LEB properties are categorized to enable fast find operations. This function -+ * returns the LEB category to which the LEB properties belong. Note however -+ * that if the LEB category is stored as a heap and the heap is full, the -+ * LEB properties may have their category changed to %LPROPS_UNCAT. -+ */ -+int ubifs_categorize_lprops(const struct ubifs_info *c, -+ const struct ubifs_lprops *lprops) -+{ -+ if (lprops->flags & LPROPS_TAKEN) -+ return LPROPS_UNCAT; -+ -+ if (lprops->free == c->leb_size) { -+ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); -+ return LPROPS_EMPTY; -+ } -+ -+ if (lprops->free + lprops->dirty == c->leb_size) { -+ if (lprops->flags & LPROPS_INDEX) -+ return LPROPS_FRDI_IDX; -+ else -+ return LPROPS_FREEABLE; -+ } -+ -+ if (lprops->flags & LPROPS_INDEX) { -+ if (lprops->dirty + lprops->free >= c->min_idx_node_sz) -+ return LPROPS_DIRTY_IDX; -+ } else { -+ if (lprops->dirty >= c->dead_wm && -+ lprops->dirty > lprops->free) -+ return LPROPS_DIRTY; -+ if (lprops->free > 0) -+ return LPROPS_FREE; -+ } -+ -+ return LPROPS_UNCAT; -+} -+ -+/** -+ * change_category - change LEB properties category. -+ * @c: UBIFS file-system description object -+ * @lprops: LEB properties to recategorize -+ * -+ * LEB properties are categorized to enable fast find operations. When the LEB -+ * properties change they must be recategorized. -+ */ -+static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) -+{ -+ int old_cat = lprops->flags & LPROPS_CAT_MASK; -+ int new_cat = ubifs_categorize_lprops(c, lprops); -+ -+ if (old_cat == new_cat) { -+ struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; -+ -+ /* lprops on a heap now must be moved up or down */ -+ if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) -+ return; /* Not on a heap */ -+ heap = &c->lpt_heap[new_cat - 1]; -+ adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat); -+ } else { -+ ubifs_remove_from_cat(c, lprops, old_cat); -+ ubifs_add_to_cat(c, lprops, new_cat); -+ } -+} -+ -+/** -+ * calc_dark - calculate LEB dark space size. -+ * @c: the UBIFS file-system description object -+ * @spc: amount of free and dirty space in the LEB -+ * -+ * This function calculates amount of dark space in an LEB which has @spc bytes -+ * of free and dirty space. Returns the calculations result. -+ * -+ * Dark space is the space which is not always usable - it depends on which -+ * nodes are written in which order. E.g., if an LEB has only 512 free bytes, -+ * it is dark space, because it cannot fit a large data node. So UBIFS cannot -+ * count on this LEB and treat these 512 bytes as usable because it is not true -+ * if, for example, only big chunks of uncompressible data will be written to -+ * the FS. -+ */ -+static int calc_dark(struct ubifs_info *c, int spc) -+{ -+ ubifs_assert(!(spc & 7)); -+ -+ if (spc < c->dark_wm) -+ return spc; -+ -+ /* -+ * If we have slightly more space then the dark space watermark, we can -+ * anyway safely assume it we'll be able to write a node of the -+ * smallest size there. -+ */ -+ if (spc - c->dark_wm < MIN_WRITE_SZ) -+ return spc - MIN_WRITE_SZ; -+ -+ return c->dark_wm; -+} -+ -+/** -+ * is_lprops_dirty - determine if LEB properties are dirty. -+ * @c: the UBIFS file-system description object -+ * @lprops: LEB properties to test -+ */ -+static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) -+{ -+ struct ubifs_pnode *pnode; -+ int pos; -+ -+ pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1); -+ pnode = (struct ubifs_pnode *)container_of(lprops - pos, -+ struct ubifs_pnode, -+ lprops[0]); -+ return !test_bit(COW_ZNODE, &pnode->flags) && -+ test_bit(DIRTY_CNODE, &pnode->flags); -+} -+ -+/** -+ * ubifs_change_lp - change LEB properties. -+ * @c: the UBIFS file-system description object -+ * @lp: LEB properties to change -+ * @free: new free space amount -+ * @dirty: new dirty space amount -+ * @flags: new flags -+ * @idx_gc_cnt: change to the count of idx_gc list -+ * -+ * This function changes LEB properties (@free, @dirty or @flag). However, the -+ * property which has the %LPROPS_NC value is not changed. Returns a pointer to -+ * the updated LEB properties on success and a negative error code on failure. -+ * -+ * Note, the LEB properties may have had to be copied (due to COW) and -+ * consequently the pointer returned may not be the same as the pointer -+ * passed. -+ */ -+const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, -+ const struct ubifs_lprops *lp, -+ int free, int dirty, int flags, -+ int idx_gc_cnt) -+{ -+ /* -+ * This is the only function that is allowed to change lprops, so we -+ * discard the const qualifier. -+ */ -+ struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; -+ -+ dbg_lp("LEB %d, free %d, dirty %d, flags %d", -+ lprops->lnum, free, dirty, flags); -+ -+ ubifs_assert(mutex_is_locked(&c->lp_mutex)); -+ ubifs_assert(c->lst.empty_lebs >= 0 && -+ c->lst.empty_lebs <= c->main_lebs); -+ ubifs_assert(c->freeable_cnt >= 0); -+ ubifs_assert(c->freeable_cnt <= c->main_lebs); -+ ubifs_assert(c->lst.taken_empty_lebs >= 0); -+ ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs); -+ ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7)); -+ ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7)); -+ ubifs_assert(!(c->lst.total_used & 7)); -+ ubifs_assert(free == LPROPS_NC || free >= 0); -+ ubifs_assert(dirty == LPROPS_NC || dirty >= 0); -+ -+ if (!is_lprops_dirty(c, lprops)) { -+ lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum); -+ if (IS_ERR(lprops)) -+ return lprops; -+ } else -+ ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum)); -+ -+ ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); -+ -+ spin_lock(&c->space_lock); -+ if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) -+ c->lst.taken_empty_lebs -= 1; -+ -+ if (!(lprops->flags & LPROPS_INDEX)) { -+ int old_spc; -+ -+ old_spc = lprops->free + lprops->dirty; -+ if (old_spc < c->dead_wm) -+ c->lst.total_dead -= old_spc; -+ else -+ c->lst.total_dark -= calc_dark(c, old_spc); -+ -+ c->lst.total_used -= c->leb_size - old_spc; -+ } -+ -+ if (free != LPROPS_NC) { -+ free = ALIGN(free, 8); -+ c->lst.total_free += free - lprops->free; -+ -+ /* Increase or decrease empty LEBs counter if needed */ -+ if (free == c->leb_size) { -+ if (lprops->free != c->leb_size) -+ c->lst.empty_lebs += 1; -+ } else if (lprops->free == c->leb_size) -+ c->lst.empty_lebs -= 1; -+ lprops->free = free; -+ } -+ -+ if (dirty != LPROPS_NC) { -+ dirty = ALIGN(dirty, 8); -+ c->lst.total_dirty += dirty - lprops->dirty; -+ lprops->dirty = dirty; -+ } -+ -+ if (flags != LPROPS_NC) { -+ /* Take care about indexing LEBs counter if needed */ -+ if ((lprops->flags & LPROPS_INDEX)) { -+ if (!(flags & LPROPS_INDEX)) -+ c->lst.idx_lebs -= 1; -+ } else if (flags & LPROPS_INDEX) -+ c->lst.idx_lebs += 1; -+ lprops->flags = flags; -+ } -+ -+ if (!(lprops->flags & LPROPS_INDEX)) { -+ int new_spc; -+ -+ new_spc = lprops->free + lprops->dirty; -+ if (new_spc < c->dead_wm) -+ c->lst.total_dead += new_spc; -+ else -+ c->lst.total_dark += calc_dark(c, new_spc); -+ -+ c->lst.total_used += c->leb_size - new_spc; -+ } -+ -+ if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) -+ c->lst.taken_empty_lebs += 1; -+ -+ change_category(c, lprops); -+ c->idx_gc_cnt += idx_gc_cnt; -+ spin_unlock(&c->space_lock); -+ return lprops; -+} -+ -+/** -+ * ubifs_get_lp_stats - get lprops statistics. -+ * @c: UBIFS file-system description object -+ * @st: return statistics -+ */ -+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst) -+{ -+ spin_lock(&c->space_lock); -+ memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats)); -+ spin_unlock(&c->space_lock); -+} -+ -+/** -+ * ubifs_change_one_lp - change LEB properties. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB to change properties for -+ * @free: amount of free space -+ * @dirty: amount of dirty space -+ * @flags_set: flags to set -+ * @flags_clean: flags to clean -+ * @idx_gc_cnt: change to the count of idx_gc list -+ * -+ * This function changes properties of LEB @lnum. It is a helper wrapper over -+ * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the -+ * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and -+ * a negative error code in case of failure. -+ */ -+int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, -+ int flags_set, int flags_clean, int idx_gc_cnt) -+{ -+ int err = 0, flags; -+ const struct ubifs_lprops *lp; -+ -+ ubifs_get_lprops(c); -+ -+ lp = ubifs_lpt_lookup_dirty(c, lnum); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ -+ flags = (lp->flags | flags_set) & ~flags_clean; -+ lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt); -+ if (IS_ERR(lp)) -+ err = PTR_ERR(lp); -+ -+out: -+ ubifs_release_lprops(c); -+ if (err) -+ ubifs_err("cannot change properties of LEB %d, error %d", -+ lnum, err); -+ return err; -+} -+ -+/** -+ * ubifs_update_one_lp - update LEB properties. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB to change properties for -+ * @free: amount of free space -+ * @dirty: amount of dirty space to add -+ * @flags_set: flags to set -+ * @flags_clean: flags to clean -+ * -+ * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to -+ * current dirty space, not substitutes it. -+ */ -+int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, -+ int flags_set, int flags_clean) -+{ -+ int err = 0, flags; -+ const struct ubifs_lprops *lp; -+ -+ ubifs_get_lprops(c); -+ -+ lp = ubifs_lpt_lookup_dirty(c, lnum); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ -+ flags = (lp->flags | flags_set) & ~flags_clean; -+ lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0); -+ if (IS_ERR(lp)) -+ err = PTR_ERR(lp); -+ -+out: -+ ubifs_release_lprops(c); -+ if (err) -+ ubifs_err("cannot update properties of LEB %d, error %d", -+ lnum, err); -+ return err; -+} -+ -+/** -+ * ubifs_read_one_lp - read LEB properties. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB to read properties for -+ * @lp: where to store read properties -+ * -+ * This helper function reads properties of a LEB @lnum and stores them in @lp. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) -+{ -+ int err = 0; -+ const struct ubifs_lprops *lpp; -+ -+ ubifs_get_lprops(c); -+ -+ lpp = ubifs_lpt_lookup(c, lnum); -+ if (IS_ERR(lpp)) { -+ err = PTR_ERR(lpp); -+ ubifs_err("cannot read properties of LEB %d, error %d", -+ lnum, err); -+ goto out; -+ } -+ -+ memcpy(lp, lpp, sizeof(struct ubifs_lprops)); -+ -+out: -+ ubifs_release_lprops(c); -+ return err; -+} -+ -+/** -+ * ubifs_fast_find_free - try to find a LEB with free space quickly. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns LEB properties for a LEB with free space or %NULL if -+ * the function is unable to find a LEB quickly. -+ */ -+const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c) -+{ -+ struct ubifs_lprops *lprops; -+ struct ubifs_lpt_heap *heap; -+ -+ ubifs_assert(mutex_is_locked(&c->lp_mutex)); -+ -+ heap = &c->lpt_heap[LPROPS_FREE - 1]; -+ if (heap->cnt == 0) -+ return NULL; -+ -+ lprops = heap->arr[0]; -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); -+ return lprops; -+} -+ -+/** -+ * ubifs_fast_find_empty - try to find an empty LEB quickly. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns LEB properties for an empty LEB or %NULL if the -+ * function is unable to find an empty LEB quickly. -+ */ -+const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c) -+{ -+ struct ubifs_lprops *lprops; -+ -+ ubifs_assert(mutex_is_locked(&c->lp_mutex)); -+ -+ if (list_empty(&c->empty_list)) -+ return NULL; -+ -+ lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list); -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); -+ ubifs_assert(lprops->free == c->leb_size); -+ return lprops; -+} -+ -+/** -+ * ubifs_fast_find_freeable - try to find a freeable LEB quickly. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns LEB properties for a freeable LEB or %NULL if the -+ * function is unable to find a freeable LEB quickly. -+ */ -+const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c) -+{ -+ struct ubifs_lprops *lprops; -+ -+ ubifs_assert(mutex_is_locked(&c->lp_mutex)); -+ -+ if (list_empty(&c->freeable_list)) -+ return NULL; -+ -+ lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list); -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); -+ ubifs_assert(lprops->free + lprops->dirty == c->leb_size); -+ ubifs_assert(c->freeable_cnt > 0); -+ return lprops; -+} -+ -+/** -+ * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns LEB properties for a freeable index LEB or %NULL if the -+ * function is unable to find a freeable index LEB quickly. -+ */ -+const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) -+{ -+ struct ubifs_lprops *lprops; -+ -+ ubifs_assert(mutex_is_locked(&c->lp_mutex)); -+ -+ if (list_empty(&c->frdi_idx_list)) -+ return NULL; -+ -+ lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list); -+ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); -+ ubifs_assert((lprops->flags & LPROPS_INDEX)); -+ ubifs_assert(lprops->free + lprops->dirty == c->leb_size); -+ return lprops; -+} -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+/** -+ * dbg_check_cats - check category heaps and lists. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int dbg_check_cats(struct ubifs_info *c) -+{ -+ struct ubifs_lprops *lprops; -+ struct list_head *pos; -+ int i, cat; -+ -+ if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) -+ return 0; -+ -+ list_for_each_entry(lprops, &c->empty_list, list) { -+ if (lprops->free != c->leb_size) { -+ ubifs_err("non-empty LEB %d on empty list " -+ "(free %d dirty %d flags %d)", lprops->lnum, -+ lprops->free, lprops->dirty, lprops->flags); -+ return -EINVAL; -+ } -+ if (lprops->flags & LPROPS_TAKEN) { -+ ubifs_err("taken LEB %d on empty list " -+ "(free %d dirty %d flags %d)", lprops->lnum, -+ lprops->free, lprops->dirty, lprops->flags); -+ return -EINVAL; -+ } -+ } -+ -+ i = 0; -+ list_for_each_entry(lprops, &c->freeable_list, list) { -+ if (lprops->free + lprops->dirty != c->leb_size) { -+ ubifs_err("non-freeable LEB %d on freeable list " -+ "(free %d dirty %d flags %d)", lprops->lnum, -+ lprops->free, lprops->dirty, lprops->flags); -+ return -EINVAL; -+ } -+ if (lprops->flags & LPROPS_TAKEN) { -+ ubifs_err("taken LEB %d on freeable list " -+ "(free %d dirty %d flags %d)", lprops->lnum, -+ lprops->free, lprops->dirty, lprops->flags); -+ return -EINVAL; -+ } -+ i += 1; -+ } -+ if (i != c->freeable_cnt) { -+ ubifs_err("freeable list count %d expected %d", i, -+ c->freeable_cnt); -+ return -EINVAL; -+ } -+ -+ i = 0; -+ list_for_each(pos, &c->idx_gc) -+ i += 1; -+ if (i != c->idx_gc_cnt) { -+ ubifs_err("idx_gc list count %d expected %d", i, -+ c->idx_gc_cnt); -+ return -EINVAL; -+ } -+ -+ list_for_each_entry(lprops, &c->frdi_idx_list, list) { -+ if (lprops->free + lprops->dirty != c->leb_size) { -+ ubifs_err("non-freeable LEB %d on frdi_idx list " -+ "(free %d dirty %d flags %d)", lprops->lnum, -+ lprops->free, lprops->dirty, lprops->flags); -+ return -EINVAL; -+ } -+ if (lprops->flags & LPROPS_TAKEN) { -+ ubifs_err("taken LEB %d on frdi_idx list " -+ "(free %d dirty %d flags %d)", lprops->lnum, -+ lprops->free, lprops->dirty, lprops->flags); -+ return -EINVAL; -+ } -+ if (!(lprops->flags & LPROPS_INDEX)) { -+ ubifs_err("non-index LEB %d on frdi_idx list " -+ "(free %d dirty %d flags %d)", lprops->lnum, -+ lprops->free, lprops->dirty, lprops->flags); -+ return -EINVAL; -+ } -+ } -+ -+ for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) { -+ struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; -+ -+ for (i = 0; i < heap->cnt; i++) { -+ lprops = heap->arr[i]; -+ if (!lprops) { -+ ubifs_err("null ptr in LPT heap cat %d", cat); -+ return -EINVAL; -+ } -+ if (lprops->hpos != i) { -+ ubifs_err("bad ptr in LPT heap cat %d", cat); -+ return -EINVAL; -+ } -+ if (lprops->flags & LPROPS_TAKEN) { -+ ubifs_err("taken LEB in LPT heap cat %d", cat); -+ return -EINVAL; -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, -+ int add_pos) -+{ -+ int i = 0, j, err = 0; -+ -+ if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) -+ return; -+ -+ for (i = 0; i < heap->cnt; i++) { -+ struct ubifs_lprops *lprops = heap->arr[i]; -+ struct ubifs_lprops *lp; -+ -+ if (i != add_pos) -+ if ((lprops->flags & LPROPS_CAT_MASK) != cat) { -+ err = 1; -+ goto out; -+ } -+ if (lprops->hpos != i) { -+ err = 2; -+ goto out; -+ } -+ lp = ubifs_lpt_lookup(c, lprops->lnum); -+ if (IS_ERR(lp)) { -+ err = 3; -+ goto out; -+ } -+ if (lprops != lp) { -+ dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", -+ (size_t)lprops, (size_t)lp, lprops->lnum, -+ lp->lnum); -+ err = 4; -+ goto out; -+ } -+ for (j = 0; j < i; j++) { -+ lp = heap->arr[j]; -+ if (lp == lprops) { -+ err = 5; -+ goto out; -+ } -+ if (lp->lnum == lprops->lnum) { -+ err = 6; -+ goto out; -+ } -+ } -+ } -+out: -+ if (err) { -+ dbg_msg("failed cat %d hpos %d err %d", cat, i, err); -+ dbg_dump_stack(); -+ dbg_dump_heap(c, heap, cat); -+ } -+} -+ -+/** -+ * struct scan_check_data - data provided to scan callback function. -+ * @lst: LEB properties statistics -+ * @err: error code -+ */ -+struct scan_check_data { -+ struct ubifs_lp_stats lst; -+ int err; -+}; -+ -+/** -+ * scan_check_cb - scan callback. -+ * @c: the UBIFS file-system description object -+ * @lp: LEB properties to scan -+ * @in_tree: whether the LEB properties are in main memory -+ * @data: information passed to and from the caller of the scan -+ * -+ * This function returns a code that indicates whether the scan should continue -+ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree -+ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop -+ * (%LPT_SCAN_STOP). -+ */ -+static int scan_check_cb(struct ubifs_info *c, -+ const struct ubifs_lprops *lp, int in_tree, -+ struct scan_check_data *data) -+{ -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ struct ubifs_lp_stats *lst = &data->lst; -+ int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; -+ -+ cat = lp->flags & LPROPS_CAT_MASK; -+ if (cat != LPROPS_UNCAT) { -+ cat = ubifs_categorize_lprops(c, lp); -+ if (cat != (lp->flags & LPROPS_CAT_MASK)) { -+ ubifs_err("bad LEB category %d expected %d", -+ (lp->flags & LPROPS_CAT_MASK), cat); -+ goto out; -+ } -+ } -+ -+ /* Check lp is on its category list (if it has one) */ -+ if (in_tree) { -+ struct list_head *list = NULL; -+ -+ switch (cat) { -+ case LPROPS_EMPTY: -+ list = &c->empty_list; -+ break; -+ case LPROPS_FREEABLE: -+ list = &c->freeable_list; -+ break; -+ case LPROPS_FRDI_IDX: -+ list = &c->frdi_idx_list; -+ break; -+ case LPROPS_UNCAT: -+ list = &c->uncat_list; -+ break; -+ } -+ if (list) { -+ struct ubifs_lprops *lprops; -+ int found = 0; -+ -+ list_for_each_entry(lprops, list, list) { -+ if (lprops == lp) { -+ found = 1; -+ break; -+ } -+ } -+ if (!found) { -+ ubifs_err("bad LPT list (category %d)", cat); -+ goto out; -+ } -+ } -+ } -+ -+ /* Check lp is on its category heap (if it has one) */ -+ if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) { -+ struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; -+ -+ if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || -+ lp != heap->arr[lp->hpos]) { -+ ubifs_err("bad LPT heap (category %d)", cat); -+ goto out; -+ } -+ } -+ -+ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); -+ if (IS_ERR(sleb)) { -+ /* -+ * After an unclean unmount, empty and freeable LEBs -+ * may contain garbage. -+ */ -+ if (lp->free == c->leb_size) { -+ ubifs_err("scan errors were in empty LEB " -+ "- continuing checking"); -+ lst->empty_lebs += 1; -+ lst->total_free += c->leb_size; -+ lst->total_dark += calc_dark(c, c->leb_size); -+ return LPT_SCAN_CONTINUE; -+ } -+ -+ if (lp->free + lp->dirty == c->leb_size && -+ !(lp->flags & LPROPS_INDEX)) { -+ ubifs_err("scan errors were in freeable LEB " -+ "- continuing checking"); -+ lst->total_free += lp->free; -+ lst->total_dirty += lp->dirty; -+ lst->total_dark += calc_dark(c, c->leb_size); -+ return LPT_SCAN_CONTINUE; -+ } -+ data->err = PTR_ERR(sleb); -+ return LPT_SCAN_STOP; -+ } -+ -+ is_idx = -1; -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ int found, level = 0; -+ -+ cond_resched(); -+ -+ if (is_idx == -1) -+ is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; -+ -+ if (is_idx && snod->type != UBIFS_IDX_NODE) { -+ ubifs_err("indexing node in data LEB %d:%d", -+ lnum, snod->offs); -+ goto out_destroy; -+ } -+ -+ if (snod->type == UBIFS_IDX_NODE) { -+ struct ubifs_idx_node *idx = snod->node; -+ -+ key_read(c, ubifs_idx_key(c, idx), &snod->key); -+ level = le16_to_cpu(idx->level); -+ } -+ -+ found = ubifs_tnc_has_node(c, &snod->key, level, lnum, -+ snod->offs, is_idx); -+ if (found) { -+ if (found < 0) -+ goto out_destroy; -+ used += ALIGN(snod->len, 8); -+ } -+ } -+ -+ free = c->leb_size - sleb->endpt; -+ dirty = sleb->endpt - used; -+ -+ if (free > c->leb_size || free < 0 || dirty > c->leb_size || -+ dirty < 0) { -+ ubifs_err("bad calculated accounting for LEB %d: " -+ "free %d, dirty %d", lnum, free, dirty); -+ goto out_destroy; -+ } -+ -+ if (lp->free + lp->dirty == c->leb_size && -+ free + dirty == c->leb_size) -+ if ((is_idx && !(lp->flags & LPROPS_INDEX)) || -+ (!is_idx && free == c->leb_size) || -+ lp->free == c->leb_size) { -+ /* -+ * Empty or freeable LEBs could contain index -+ * nodes from an uncompleted commit due to an -+ * unclean unmount. Or they could be empty for -+ * the same reason. Or it may simply not have been -+ * unmapped. -+ */ -+ free = lp->free; -+ dirty = lp->dirty; -+ is_idx = 0; -+ } -+ -+ if (is_idx && lp->free + lp->dirty == free + dirty && -+ lnum != c->ihead_lnum) { -+ /* -+ * After an unclean unmount, an index LEB could have a different -+ * amount of free space than the value recorded by lprops. That -+ * is because the in-the-gaps method may use free space or -+ * create free space (as a side-effect of using ubi_leb_change -+ * and not writing the whole LEB). The incorrect free space -+ * value is not a problem because the index is only ever -+ * allocated empty LEBs, so there will never be an attempt to -+ * write to the free space at the end of an index LEB - except -+ * by the in-the-gaps method for which it is not a problem. -+ */ -+ free = lp->free; -+ dirty = lp->dirty; -+ } -+ -+ if (lp->free != free || lp->dirty != dirty) -+ goto out_print; -+ -+ if (is_idx && !(lp->flags & LPROPS_INDEX)) { -+ if (free == c->leb_size) -+ /* Free but not unmapped LEB, it's fine */ -+ is_idx = 0; -+ else { -+ ubifs_err("indexing node without indexing " -+ "flag"); -+ goto out_print; -+ } -+ } -+ -+ if (!is_idx && (lp->flags & LPROPS_INDEX)) { -+ ubifs_err("data node with indexing flag"); -+ goto out_print; -+ } -+ -+ if (free == c->leb_size) -+ lst->empty_lebs += 1; -+ -+ if (is_idx) -+ lst->idx_lebs += 1; -+ -+ if (!(lp->flags & LPROPS_INDEX)) -+ lst->total_used += c->leb_size - free - dirty; -+ lst->total_free += free; -+ lst->total_dirty += dirty; -+ -+ if (!(lp->flags & LPROPS_INDEX)) { -+ int spc = free + dirty; -+ -+ if (spc < c->dead_wm) -+ lst->total_dead += spc; -+ else -+ lst->total_dark += calc_dark(c, spc); -+ } -+ -+ ubifs_scan_destroy(sleb); -+ return LPT_SCAN_CONTINUE; -+ -+out_print: -+ ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " -+ "should be free %d, dirty %d", -+ lnum, lp->free, lp->dirty, lp->flags, free, dirty); -+ dbg_dump_leb(c, lnum); -+out_destroy: -+ ubifs_scan_destroy(sleb); -+out: -+ data->err = -EINVAL; -+ return LPT_SCAN_STOP; -+} -+ -+/** -+ * dbg_check_lprops - check all LEB properties. -+ * @c: UBIFS file-system description object -+ * -+ * This function checks all LEB properties and makes sure they are all correct. -+ * It returns zero if everything is fine, %-EINVAL if there is an inconsistency -+ * and other negative error codes in case of other errors. This function is -+ * called while the file system is locked (because of commit start), so no -+ * additional locking is required. Note that locking the LPT mutex would cause -+ * a circular lock dependency with the TNC mutex. -+ */ -+int dbg_check_lprops(struct ubifs_info *c) -+{ -+ int i, err; -+ struct scan_check_data data; -+ struct ubifs_lp_stats *lst = &data.lst; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) -+ return 0; -+ -+ /* -+ * As we are going to scan the media, the write buffers have to be -+ * synchronized. -+ */ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); -+ if (err) -+ return err; -+ } -+ -+ memset(lst, 0, sizeof(struct ubifs_lp_stats)); -+ -+ data.err = 0; -+ err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, -+ (ubifs_lpt_scan_callback)scan_check_cb, -+ &data); -+ if (err && err != -ENOSPC) -+ goto out; -+ if (data.err) { -+ err = data.err; -+ goto out; -+ } -+ -+ if (lst->empty_lebs != c->lst.empty_lebs || -+ lst->idx_lebs != c->lst.idx_lebs || -+ lst->total_free != c->lst.total_free || -+ lst->total_dirty != c->lst.total_dirty || -+ lst->total_used != c->lst.total_used) { -+ ubifs_err("bad overall accounting"); -+ ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " -+ "total_free %lld, total_dirty %lld, total_used %lld", -+ lst->empty_lebs, lst->idx_lebs, lst->total_free, -+ lst->total_dirty, lst->total_used); -+ ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " -+ "total_free %lld, total_dirty %lld, total_used %lld", -+ c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, -+ c->lst.total_dirty, c->lst.total_used); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ if (lst->total_dead != c->lst.total_dead || -+ lst->total_dark != c->lst.total_dark) { -+ ubifs_err("bad dead/dark space accounting"); -+ ubifs_err("calculated: total_dead %lld, total_dark %lld", -+ lst->total_dead, lst->total_dark); -+ ubifs_err("read from lprops: total_dead %lld, total_dark %lld", -+ c->lst.total_dead, c->lst.total_dark); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ err = dbg_check_cats(c); -+out: -+ return err; -+} -+ -+#endif /* CONFIG_UBIFS_FS_DEBUG */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/lpt.c linux-2.6.24/fs/ubifs/lpt.c ---- linux-2.6.24.orig/fs/ubifs/lpt.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/lpt.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,2271 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements the LEB properties tree (LPT) area. The LPT area -+ * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and -+ * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits -+ * between the log and the orphan area. -+ * -+ * The LPT area is like a miniature self-contained file system. It is required -+ * that it never runs out of space, is fast to access and update, and scales -+ * logarithmically. The LEB properties tree is implemented as a wandering tree -+ * much like the TNC, and the LPT area has its own garbage collection. -+ * -+ * The LPT has two slightly different forms called the "small model" and the -+ * "big model". The small model is used when the entire LEB properties table -+ * can be written into a single eraseblock. In that case, garbage collection -+ * consists of just writing the whole table, which therefore makes all other -+ * eraseblocks reusable. In the case of the big model, dirty eraseblocks are -+ * selected for garbage collection, which consists of marking the clean nodes in -+ * that LEB as dirty, and then only the dirty nodes are written out. Also, in -+ * the case of the big model, a table of LEB numbers is saved so that the entire -+ * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first -+ * mounted. -+ */ -+ -+#include "ubifs.h" -+#include <linux/crc16.h> -+ -+/** -+ * do_calc_lpt_geom - calculate sizes for the LPT area. -+ * @c: the UBIFS file-system description object -+ * -+ * Calculate the sizes of LPT bit fields, nodes, and tree, based on the -+ * properties of the flash and whether LPT is "big" (c->big_lpt). -+ */ -+static void do_calc_lpt_geom(struct ubifs_info *c) -+{ -+ int i, n, bits, per_leb_wastage, max_pnode_cnt; -+ long long sz, tot_wastage; -+ -+ n = c->main_lebs + c->max_leb_cnt - c->leb_cnt; -+ max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); -+ -+ c->lpt_hght = 1; -+ n = UBIFS_LPT_FANOUT; -+ while (n < max_pnode_cnt) { -+ c->lpt_hght += 1; -+ n <<= UBIFS_LPT_FANOUT_SHIFT; -+ } -+ -+ c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); -+ -+ n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT); -+ c->nnode_cnt = n; -+ for (i = 1; i < c->lpt_hght; i++) { -+ n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); -+ c->nnode_cnt += n; -+ } -+ -+ c->space_bits = fls(c->leb_size) - 3; -+ c->lpt_lnum_bits = fls(c->lpt_lebs); -+ c->lpt_offs_bits = fls(c->leb_size - 1); -+ c->lpt_spc_bits = fls(c->leb_size); -+ -+ n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT); -+ c->pcnt_bits = fls(n - 1); -+ -+ c->lnum_bits = fls(c->max_leb_cnt - 1); -+ -+ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + -+ (c->big_lpt ? c->pcnt_bits : 0) + -+ (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT; -+ c->pnode_sz = (bits + 7) / 8; -+ -+ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + -+ (c->big_lpt ? c->pcnt_bits : 0) + -+ (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT; -+ c->nnode_sz = (bits + 7) / 8; -+ -+ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + -+ c->lpt_lebs * c->lpt_spc_bits * 2; -+ c->ltab_sz = (bits + 7) / 8; -+ -+ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + -+ c->lnum_bits * c->lsave_cnt; -+ c->lsave_sz = (bits + 7) / 8; -+ -+ /* Calculate the minimum LPT size */ -+ c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; -+ c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; -+ c->lpt_sz += c->ltab_sz; -+ if (c->big_lpt) -+ c->lpt_sz += c->lsave_sz; -+ -+ /* Add wastage */ -+ sz = c->lpt_sz; -+ per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz); -+ sz += per_leb_wastage; -+ tot_wastage = per_leb_wastage; -+ while (sz > c->leb_size) { -+ sz += per_leb_wastage; -+ sz -= c->leb_size; -+ tot_wastage += per_leb_wastage; -+ } -+ tot_wastage += ALIGN(sz, c->min_io_size) - sz; -+ c->lpt_sz += tot_wastage; -+} -+ -+/** -+ * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_calc_lpt_geom(struct ubifs_info *c) -+{ -+ int lebs_needed; -+ long long sz; -+ -+ do_calc_lpt_geom(c); -+ -+ /* Verify that lpt_lebs is big enough */ -+ sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ -+ lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); -+ if (lebs_needed > c->lpt_lebs) { -+ ubifs_err("too few LPT LEBs"); -+ return -EINVAL; -+ } -+ -+ /* Verify that ltab fits in a single LEB (since ltab is a single node */ -+ if (c->ltab_sz > c->leb_size) { -+ ubifs_err("LPT ltab too big"); -+ return -EINVAL; -+ } -+ -+ c->check_lpt_free = c->big_lpt; -+ return 0; -+} -+ -+/** -+ * calc_dflt_lpt_geom - calculate default LPT geometry. -+ * @c: the UBIFS file-system description object -+ * @main_lebs: number of main area LEBs is passed and returned here -+ * @big_lpt: whether the LPT area is "big" is returned here -+ * -+ * The size of the LPT area depends on parameters that themselves are dependent -+ * on the size of the LPT area. This function, successively recalculates the LPT -+ * area geometry until the parameters and resultant geometry are consistent. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, -+ int *big_lpt) -+{ -+ int i, lebs_needed; -+ long long sz; -+ -+ /* Start by assuming the minimum number of LPT LEBs */ -+ c->lpt_lebs = UBIFS_MIN_LPT_LEBS; -+ c->main_lebs = *main_lebs - c->lpt_lebs; -+ if (c->main_lebs <= 0) -+ return -EINVAL; -+ -+ /* And assume we will use the small LPT model */ -+ c->big_lpt = 0; -+ -+ /* -+ * Calculate the geometry based on assumptions above and then see if it -+ * makes sense -+ */ -+ do_calc_lpt_geom(c); -+ -+ /* Small LPT model must have lpt_sz < leb_size */ -+ if (c->lpt_sz > c->leb_size) { -+ /* Nope, so try again using big LPT model */ -+ c->big_lpt = 1; -+ do_calc_lpt_geom(c); -+ } -+ -+ /* Now check there are enough LPT LEBs */ -+ for (i = 0; i < 64 ; i++) { -+ sz = c->lpt_sz * 4; /* Allow 4 times the size */ -+ lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); -+ if (lebs_needed > c->lpt_lebs) { -+ /* Not enough LPT LEBs so try again with more */ -+ c->lpt_lebs = lebs_needed; -+ c->main_lebs = *main_lebs - c->lpt_lebs; -+ if (c->main_lebs <= 0) -+ return -EINVAL; -+ do_calc_lpt_geom(c); -+ continue; -+ } -+ if (c->ltab_sz > c->leb_size) { -+ ubifs_err("LPT ltab too big"); -+ return -EINVAL; -+ } -+ *main_lebs = c->main_lebs; -+ *big_lpt = c->big_lpt; -+ return 0; -+ } -+ return -EINVAL; -+} -+ -+/** -+ * pack_bits - pack bit fields end-to-end. -+ * @addr: address at which to pack (passed and next address returned) -+ * @pos: bit position at which to pack (passed and next position returned) -+ * @val: value to pack -+ * @nrbits: number of bits of value to pack (1-32) -+ */ -+static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits) -+{ -+ uint8_t *p = *addr; -+ int b = *pos; -+ -+ ubifs_assert(nrbits > 0); -+ ubifs_assert(nrbits <= 32); -+ ubifs_assert(*pos >= 0); -+ ubifs_assert(*pos < 8); -+ ubifs_assert((val >> nrbits) == 0 || nrbits == 32); -+ if (b) { -+ *p |= ((uint8_t)val) << b; -+ nrbits += b; -+ if (nrbits > 8) { -+ *++p = (uint8_t)(val >>= (8 - b)); -+ if (nrbits > 16) { -+ *++p = (uint8_t)(val >>= 8); -+ if (nrbits > 24) { -+ *++p = (uint8_t)(val >>= 8); -+ if (nrbits > 32) -+ *++p = (uint8_t)(val >>= 8); -+ } -+ } -+ } -+ } else { -+ *p = (uint8_t)val; -+ if (nrbits > 8) { -+ *++p = (uint8_t)(val >>= 8); -+ if (nrbits > 16) { -+ *++p = (uint8_t)(val >>= 8); -+ if (nrbits > 24) -+ *++p = (uint8_t)(val >>= 8); -+ } -+ } -+ } -+ b = nrbits & 7; -+ if (b == 0) -+ p++; -+ *addr = p; -+ *pos = b; -+} -+ -+/** -+ * ubifs_unpack_bits - unpack bit fields. -+ * @addr: address at which to unpack (passed and next address returned) -+ * @pos: bit position at which to unpack (passed and next position returned) -+ * @nrbits: number of bits of value to unpack (1-32) -+ * -+ * This functions returns the value unpacked. -+ */ -+uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) -+{ -+ const int k = 32 - nrbits; -+ uint8_t *p = *addr; -+ int b = *pos; -+ uint32_t uninitialized_var(val); -+ const int bytes = (nrbits + b + 7) >> 3; -+ -+ ubifs_assert(nrbits > 0); -+ ubifs_assert(nrbits <= 32); -+ ubifs_assert(*pos >= 0); -+ ubifs_assert(*pos < 8); -+ if (b) { -+ switch (bytes) { -+ case 2: -+ val = p[1]; -+ break; -+ case 3: -+ val = p[1] | ((uint32_t)p[2] << 8); -+ break; -+ case 4: -+ val = p[1] | ((uint32_t)p[2] << 8) | -+ ((uint32_t)p[3] << 16); -+ break; -+ case 5: -+ val = p[1] | ((uint32_t)p[2] << 8) | -+ ((uint32_t)p[3] << 16) | -+ ((uint32_t)p[4] << 24); -+ } -+ val <<= (8 - b); -+ val |= *p >> b; -+ nrbits += b; -+ } else { -+ switch (bytes) { -+ case 1: -+ val = p[0]; -+ break; -+ case 2: -+ val = p[0] | ((uint32_t)p[1] << 8); -+ break; -+ case 3: -+ val = p[0] | ((uint32_t)p[1] << 8) | -+ ((uint32_t)p[2] << 16); -+ break; -+ case 4: -+ val = p[0] | ((uint32_t)p[1] << 8) | -+ ((uint32_t)p[2] << 16) | -+ ((uint32_t)p[3] << 24); -+ break; -+ } -+ } -+ val <<= k; -+ val >>= k; -+ b = nrbits & 7; -+ p += nrbits >> 3; -+ *addr = p; -+ *pos = b; -+ ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); -+ return val; -+} -+ -+/** -+ * ubifs_pack_pnode - pack all the bit fields of a pnode. -+ * @c: UBIFS file-system description object -+ * @buf: buffer into which to pack -+ * @pnode: pnode to pack -+ */ -+void ubifs_pack_pnode(struct ubifs_info *c, void *buf, -+ struct ubifs_pnode *pnode) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0; -+ uint16_t crc; -+ -+ pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS); -+ if (c->big_lpt) -+ pack_bits(&addr, &pos, pnode->num, c->pcnt_bits); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ pack_bits(&addr, &pos, pnode->lprops[i].free >> 3, -+ c->space_bits); -+ pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3, -+ c->space_bits); -+ if (pnode->lprops[i].flags & LPROPS_INDEX) -+ pack_bits(&addr, &pos, 1, 1); -+ else -+ pack_bits(&addr, &pos, 0, 1); -+ } -+ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, -+ c->pnode_sz - UBIFS_LPT_CRC_BYTES); -+ addr = buf; -+ pos = 0; -+ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); -+} -+ -+/** -+ * ubifs_pack_nnode - pack all the bit fields of a nnode. -+ * @c: UBIFS file-system description object -+ * @buf: buffer into which to pack -+ * @nnode: nnode to pack -+ */ -+void ubifs_pack_nnode(struct ubifs_info *c, void *buf, -+ struct ubifs_nnode *nnode) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0; -+ uint16_t crc; -+ -+ pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS); -+ if (c->big_lpt) -+ pack_bits(&addr, &pos, nnode->num, c->pcnt_bits); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ int lnum = nnode->nbranch[i].lnum; -+ -+ if (lnum == 0) -+ lnum = c->lpt_last + 1; -+ pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits); -+ pack_bits(&addr, &pos, nnode->nbranch[i].offs, -+ c->lpt_offs_bits); -+ } -+ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, -+ c->nnode_sz - UBIFS_LPT_CRC_BYTES); -+ addr = buf; -+ pos = 0; -+ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); -+} -+ -+/** -+ * ubifs_pack_ltab - pack the LPT's own lprops table. -+ * @c: UBIFS file-system description object -+ * @buf: buffer into which to pack -+ * @ltab: LPT's own lprops table to pack -+ */ -+void ubifs_pack_ltab(struct ubifs_info *c, void *buf, -+ struct ubifs_lpt_lprops *ltab) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0; -+ uint16_t crc; -+ -+ pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS); -+ for (i = 0; i < c->lpt_lebs; i++) { -+ pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits); -+ pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits); -+ } -+ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, -+ c->ltab_sz - UBIFS_LPT_CRC_BYTES); -+ addr = buf; -+ pos = 0; -+ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); -+} -+ -+/** -+ * ubifs_pack_lsave - pack the LPT's save table. -+ * @c: UBIFS file-system description object -+ * @buf: buffer into which to pack -+ * @lsave: LPT's save table to pack -+ */ -+void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0; -+ uint16_t crc; -+ -+ pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS); -+ for (i = 0; i < c->lsave_cnt; i++) -+ pack_bits(&addr, &pos, lsave[i], c->lnum_bits); -+ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, -+ c->lsave_sz - UBIFS_LPT_CRC_BYTES); -+ addr = buf; -+ pos = 0; -+ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); -+} -+ -+/** -+ * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to which to add dirty space -+ * @dirty: amount of dirty space to add -+ */ -+void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) -+{ -+ if (!dirty || !lnum) -+ return; -+ dbg_lp("LEB %d add %d to %d", -+ lnum, dirty, c->ltab[lnum - c->lpt_first].dirty); -+ ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); -+ c->ltab[lnum - c->lpt_first].dirty += dirty; -+} -+ -+/** -+ * set_ltab - set LPT LEB properties. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number -+ * @free: amount of free space -+ * @dirty: amount of dirty space -+ */ -+static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty) -+{ -+ dbg_lp("LEB %d free %d dirty %d to %d %d", -+ lnum, c->ltab[lnum - c->lpt_first].free, -+ c->ltab[lnum - c->lpt_first].dirty, free, dirty); -+ ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); -+ c->ltab[lnum - c->lpt_first].free = free; -+ c->ltab[lnum - c->lpt_first].dirty = dirty; -+} -+ -+/** -+ * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. -+ * @c: UBIFS file-system description object -+ * @nnode: nnode for which to add dirt -+ */ -+void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode) -+{ -+ struct ubifs_nnode *np = nnode->parent; -+ -+ if (np) -+ ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum, -+ c->nnode_sz); -+ else { -+ ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz); -+ if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { -+ c->lpt_drty_flgs |= LTAB_DIRTY; -+ ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); -+ } -+ } -+} -+ -+/** -+ * add_pnode_dirt - add dirty space to LPT LEB properties. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode for which to add dirt -+ */ -+static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) -+{ -+ ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, -+ c->pnode_sz); -+} -+ -+/** -+ * calc_nnode_num - calculate nnode number. -+ * @row: the row in the tree (root is zero) -+ * @col: the column in the row (leftmost is zero) -+ * -+ * The nnode number is a number that uniquely identifies a nnode and can be used -+ * easily to traverse the tree from the root to that nnode. -+ * -+ * This function calculates and returns the nnode number for the nnode at @row -+ * and @col. -+ */ -+static int calc_nnode_num(int row, int col) -+{ -+ int num, bits; -+ -+ num = 1; -+ while (row--) { -+ bits = (col & (UBIFS_LPT_FANOUT - 1)); -+ col >>= UBIFS_LPT_FANOUT_SHIFT; -+ num <<= UBIFS_LPT_FANOUT_SHIFT; -+ num |= bits; -+ } -+ return num; -+} -+ -+/** -+ * calc_nnode_num_from_parent - calculate nnode number. -+ * @c: UBIFS file-system description object -+ * @parent: parent nnode -+ * @iip: index in parent -+ * -+ * The nnode number is a number that uniquely identifies a nnode and can be used -+ * easily to traverse the tree from the root to that nnode. -+ * -+ * This function calculates and returns the nnode number based on the parent's -+ * nnode number and the index in parent. -+ */ -+static int calc_nnode_num_from_parent(const struct ubifs_info *c, -+ struct ubifs_nnode *parent, int iip) -+{ -+ int num, shft; -+ -+ if (!parent) -+ return 1; -+ shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT; -+ num = parent->num ^ (1 << shft); -+ num |= (UBIFS_LPT_FANOUT + iip) << shft; -+ return num; -+} -+ -+/** -+ * calc_pnode_num_from_parent - calculate pnode number. -+ * @c: UBIFS file-system description object -+ * @parent: parent nnode -+ * @iip: index in parent -+ * -+ * The pnode number is a number that uniquely identifies a pnode and can be used -+ * easily to traverse the tree from the root to that pnode. -+ * -+ * This function calculates and returns the pnode number based on the parent's -+ * nnode number and the index in parent. -+ */ -+static int calc_pnode_num_from_parent(const struct ubifs_info *c, -+ struct ubifs_nnode *parent, int iip) -+{ -+ int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; -+ -+ for (i = 0; i < n; i++) { -+ num <<= UBIFS_LPT_FANOUT_SHIFT; -+ num |= pnum & (UBIFS_LPT_FANOUT - 1); -+ pnum >>= UBIFS_LPT_FANOUT_SHIFT; -+ } -+ num <<= UBIFS_LPT_FANOUT_SHIFT; -+ num |= iip; -+ return num; -+} -+ -+/** -+ * ubifs_create_dflt_lpt - create default LPT. -+ * @c: UBIFS file-system description object -+ * @main_lebs: number of main area LEBs is passed and returned here -+ * @lpt_first: LEB number of first LPT LEB -+ * @lpt_lebs: number of LEBs for LPT is passed and returned here -+ * @big_lpt: use big LPT model is passed and returned here -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, -+ int *lpt_lebs, int *big_lpt) -+{ -+ int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row; -+ int blnum, boffs, bsz, bcnt; -+ struct ubifs_pnode *pnode = NULL; -+ struct ubifs_nnode *nnode = NULL; -+ void *buf = NULL, *p; -+ struct ubifs_lpt_lprops *ltab = NULL; -+ int *lsave = NULL; -+ -+ err = calc_dflt_lpt_geom(c, main_lebs, big_lpt); -+ if (err) -+ return err; -+ *lpt_lebs = c->lpt_lebs; -+ -+ /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */ -+ c->lpt_first = lpt_first; -+ /* Needed by 'set_ltab()' */ -+ c->lpt_last = lpt_first + c->lpt_lebs - 1; -+ /* Needed by 'ubifs_pack_lsave()' */ -+ c->main_first = c->leb_cnt - *main_lebs; -+ -+ lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL); -+ pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); -+ nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); -+ buf = vmalloc(c->leb_size); -+ ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); -+ if (!pnode || !nnode || !buf || !ltab || !lsave) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ ubifs_assert(!c->ltab); -+ c->ltab = ltab; /* Needed by set_ltab */ -+ -+ /* Initialize LPT's own lprops */ -+ for (i = 0; i < c->lpt_lebs; i++) { -+ ltab[i].free = c->leb_size; -+ ltab[i].dirty = 0; -+ ltab[i].tgc = 0; -+ ltab[i].cmt = 0; -+ } -+ -+ lnum = lpt_first; -+ p = buf; -+ /* Number of leaf nodes (pnodes) */ -+ cnt = c->pnode_cnt; -+ -+ /* -+ * The first pnode contains the LEB properties for the LEBs that contain -+ * the root inode node and the root index node of the index tree. -+ */ -+ node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8); -+ iopos = ALIGN(node_sz, c->min_io_size); -+ pnode->lprops[0].free = c->leb_size - iopos; -+ pnode->lprops[0].dirty = iopos - node_sz; -+ pnode->lprops[0].flags = LPROPS_INDEX; -+ -+ node_sz = UBIFS_INO_NODE_SZ; -+ iopos = ALIGN(node_sz, c->min_io_size); -+ pnode->lprops[1].free = c->leb_size - iopos; -+ pnode->lprops[1].dirty = iopos - node_sz; -+ -+ for (i = 2; i < UBIFS_LPT_FANOUT; i++) -+ pnode->lprops[i].free = c->leb_size; -+ -+ /* Add first pnode */ -+ ubifs_pack_pnode(c, p, pnode); -+ p += c->pnode_sz; -+ len = c->pnode_sz; -+ pnode->num += 1; -+ -+ /* Reset pnode values for remaining pnodes */ -+ pnode->lprops[0].free = c->leb_size; -+ pnode->lprops[0].dirty = 0; -+ pnode->lprops[0].flags = 0; -+ -+ pnode->lprops[1].free = c->leb_size; -+ pnode->lprops[1].dirty = 0; -+ -+ /* -+ * To calculate the internal node branches, we keep information about -+ * the level below. -+ */ -+ blnum = lnum; /* LEB number of level below */ -+ boffs = 0; /* Offset of level below */ -+ bcnt = cnt; /* Number of nodes in level below */ -+ bsz = c->pnode_sz; /* Size of nodes in level below */ -+ -+ /* Add all remaining pnodes */ -+ for (i = 1; i < cnt; i++) { -+ if (len + c->pnode_sz > c->leb_size) { -+ alen = ALIGN(len, c->min_io_size); -+ set_ltab(c, lnum, c->leb_size - alen, alen - len); -+ memset(p, 0xff, alen - len); -+ err = ubi_leb_change(c->ubi, lnum++, buf, alen, -+ UBI_SHORTTERM); -+ if (err) -+ goto out; -+ p = buf; -+ len = 0; -+ } -+ ubifs_pack_pnode(c, p, pnode); -+ p += c->pnode_sz; -+ len += c->pnode_sz; -+ /* -+ * pnodes are simply numbered left to right starting at zero, -+ * which means the pnode number can be used easily to traverse -+ * down the tree to the corresponding pnode. -+ */ -+ pnode->num += 1; -+ } -+ -+ row = 0; -+ for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT) -+ row += 1; -+ /* Add all nnodes, one level at a time */ -+ while (1) { -+ /* Number of internal nodes (nnodes) at next level */ -+ cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT); -+ for (i = 0; i < cnt; i++) { -+ if (len + c->nnode_sz > c->leb_size) { -+ alen = ALIGN(len, c->min_io_size); -+ set_ltab(c, lnum, c->leb_size - alen, -+ alen - len); -+ memset(p, 0xff, alen - len); -+ err = ubi_leb_change(c->ubi, lnum++, buf, alen, -+ UBI_SHORTTERM); -+ if (err) -+ goto out; -+ p = buf; -+ len = 0; -+ } -+ /* Only 1 nnode at this level, so it is the root */ -+ if (cnt == 1) { -+ c->lpt_lnum = lnum; -+ c->lpt_offs = len; -+ } -+ /* Set branches to the level below */ -+ for (j = 0; j < UBIFS_LPT_FANOUT; j++) { -+ if (bcnt) { -+ if (boffs + bsz > c->leb_size) { -+ blnum += 1; -+ boffs = 0; -+ } -+ nnode->nbranch[j].lnum = blnum; -+ nnode->nbranch[j].offs = boffs; -+ boffs += bsz; -+ bcnt--; -+ } else { -+ nnode->nbranch[j].lnum = 0; -+ nnode->nbranch[j].offs = 0; -+ } -+ } -+ nnode->num = calc_nnode_num(row, i); -+ ubifs_pack_nnode(c, p, nnode); -+ p += c->nnode_sz; -+ len += c->nnode_sz; -+ } -+ /* Only 1 nnode at this level, so it is the root */ -+ if (cnt == 1) -+ break; -+ /* Update the information about the level below */ -+ bcnt = cnt; -+ bsz = c->nnode_sz; -+ row -= 1; -+ } -+ -+ if (*big_lpt) { -+ /* Need to add LPT's save table */ -+ if (len + c->lsave_sz > c->leb_size) { -+ alen = ALIGN(len, c->min_io_size); -+ set_ltab(c, lnum, c->leb_size - alen, alen - len); -+ memset(p, 0xff, alen - len); -+ err = ubi_leb_change(c->ubi, lnum++, buf, alen, -+ UBI_SHORTTERM); -+ if (err) -+ goto out; -+ p = buf; -+ len = 0; -+ } -+ -+ c->lsave_lnum = lnum; -+ c->lsave_offs = len; -+ -+ for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++) -+ lsave[i] = c->main_first + i; -+ for (; i < c->lsave_cnt; i++) -+ lsave[i] = c->main_first; -+ -+ ubifs_pack_lsave(c, p, lsave); -+ p += c->lsave_sz; -+ len += c->lsave_sz; -+ } -+ -+ /* Need to add LPT's own LEB properties table */ -+ if (len + c->ltab_sz > c->leb_size) { -+ alen = ALIGN(len, c->min_io_size); -+ set_ltab(c, lnum, c->leb_size - alen, alen - len); -+ memset(p, 0xff, alen - len); -+ err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); -+ if (err) -+ goto out; -+ p = buf; -+ len = 0; -+ } -+ -+ c->ltab_lnum = lnum; -+ c->ltab_offs = len; -+ -+ /* Update ltab before packing it */ -+ len += c->ltab_sz; -+ alen = ALIGN(len, c->min_io_size); -+ set_ltab(c, lnum, c->leb_size - alen, alen - len); -+ -+ ubifs_pack_ltab(c, p, ltab); -+ p += c->ltab_sz; -+ -+ /* Write remaining buffer */ -+ memset(p, 0xff, alen - len); -+ err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); -+ if (err) -+ goto out; -+ -+ c->nhead_lnum = lnum; -+ c->nhead_offs = ALIGN(len, c->min_io_size); -+ -+ dbg_lp("space_bits %d", c->space_bits); -+ dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); -+ dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); -+ dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); -+ dbg_lp("pcnt_bits %d", c->pcnt_bits); -+ dbg_lp("lnum_bits %d", c->lnum_bits); -+ dbg_lp("pnode_sz %d", c->pnode_sz); -+ dbg_lp("nnode_sz %d", c->nnode_sz); -+ dbg_lp("ltab_sz %d", c->ltab_sz); -+ dbg_lp("lsave_sz %d", c->lsave_sz); -+ dbg_lp("lsave_cnt %d", c->lsave_cnt); -+ dbg_lp("lpt_hght %d", c->lpt_hght); -+ dbg_lp("big_lpt %d", c->big_lpt); -+ dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); -+ dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); -+ dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); -+ if (c->big_lpt) -+ dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); -+out: -+ c->ltab = NULL; -+ kfree(lsave); -+ vfree(ltab); -+ vfree(buf); -+ kfree(nnode); -+ kfree(pnode); -+ return err; -+} -+ -+/** -+ * update_cats - add LEB properties of a pnode to LEB category lists and heaps. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode -+ * -+ * When a pnode is loaded into memory, the LEB properties it contains are added, -+ * by this function, to the LEB category lists and heaps. -+ */ -+static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode) -+{ -+ int i; -+ -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK; -+ int lnum = pnode->lprops[i].lnum; -+ -+ if (!lnum) -+ return; -+ ubifs_add_to_cat(c, &pnode->lprops[i], cat); -+ } -+} -+ -+/** -+ * replace_cats - add LEB properties of a pnode to LEB category lists and heaps. -+ * @c: UBIFS file-system description object -+ * @old_pnode: pnode copied -+ * @new_pnode: pnode copy -+ * -+ * During commit it is sometimes necessary to copy a pnode -+ * (see dirty_cow_pnode). When that happens, references in -+ * category lists and heaps must be replaced. This function does that. -+ */ -+static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, -+ struct ubifs_pnode *new_pnode) -+{ -+ int i; -+ -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ if (!new_pnode->lprops[i].lnum) -+ return; -+ ubifs_replace_cat(c, &old_pnode->lprops[i], -+ &new_pnode->lprops[i]); -+ } -+} -+ -+/** -+ * check_lpt_crc - check LPT node crc is correct. -+ * @c: UBIFS file-system description object -+ * @buf: buffer containing node -+ * @len: length of node -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int check_lpt_crc(void *buf, int len) -+{ -+ int pos = 0; -+ uint8_t *addr = buf; -+ uint16_t crc, calc_crc; -+ -+ crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); -+ calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, -+ len - UBIFS_LPT_CRC_BYTES); -+ if (crc != calc_crc) { -+ ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, -+ calc_crc); -+ dbg_dump_stack(); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/** -+ * check_lpt_type - check LPT node type is correct. -+ * @c: UBIFS file-system description object -+ * @addr: address of type bit field is passed and returned updated here -+ * @pos: position of type bit field is passed and returned updated here -+ * @type: expected type -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int check_lpt_type(uint8_t **addr, int *pos, int type) -+{ -+ int node_type; -+ -+ node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); -+ if (node_type != type) { -+ ubifs_err("invalid type (%d) in LPT node type %d", node_type, -+ type); -+ dbg_dump_stack(); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/** -+ * unpack_pnode - unpack a pnode. -+ * @c: UBIFS file-system description object -+ * @buf: buffer containing packed pnode to unpack -+ * @pnode: pnode structure to fill -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int unpack_pnode(const struct ubifs_info *c, void *buf, -+ struct ubifs_pnode *pnode) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0, err; -+ -+ err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); -+ if (err) -+ return err; -+ if (c->big_lpt) -+ pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ struct ubifs_lprops * const lprops = &pnode->lprops[i]; -+ -+ lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits); -+ lprops->free <<= 3; -+ lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits); -+ lprops->dirty <<= 3; -+ -+ if (ubifs_unpack_bits(&addr, &pos, 1)) -+ lprops->flags = LPROPS_INDEX; -+ else -+ lprops->flags = 0; -+ lprops->flags |= ubifs_categorize_lprops(c, lprops); -+ } -+ err = check_lpt_crc(buf, c->pnode_sz); -+ return err; -+} -+ -+/** -+ * ubifs_unpack_nnode - unpack a nnode. -+ * @c: UBIFS file-system description object -+ * @buf: buffer containing packed nnode to unpack -+ * @nnode: nnode structure to fill -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, -+ struct ubifs_nnode *nnode) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0, err; -+ -+ err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); -+ if (err) -+ return err; -+ if (c->big_lpt) -+ nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ int lnum; -+ -+ lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) + -+ c->lpt_first; -+ if (lnum == c->lpt_last + 1) -+ lnum = 0; -+ nnode->nbranch[i].lnum = lnum; -+ nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, -+ c->lpt_offs_bits); -+ } -+ err = check_lpt_crc(buf, c->nnode_sz); -+ return err; -+} -+ -+/** -+ * unpack_ltab - unpack the LPT's own lprops table. -+ * @c: UBIFS file-system description object -+ * @buf: buffer from which to unpack -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int unpack_ltab(const struct ubifs_info *c, void *buf) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0, err; -+ -+ err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); -+ if (err) -+ return err; -+ for (i = 0; i < c->lpt_lebs; i++) { -+ int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); -+ int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); -+ -+ if (free < 0 || free > c->leb_size || dirty < 0 || -+ dirty > c->leb_size || free + dirty > c->leb_size) -+ return -EINVAL; -+ -+ c->ltab[i].free = free; -+ c->ltab[i].dirty = dirty; -+ c->ltab[i].tgc = 0; -+ c->ltab[i].cmt = 0; -+ } -+ err = check_lpt_crc(buf, c->ltab_sz); -+ return err; -+} -+ -+/** -+ * unpack_lsave - unpack the LPT's save table. -+ * @c: UBIFS file-system description object -+ * @buf: buffer from which to unpack -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int unpack_lsave(const struct ubifs_info *c, void *buf) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int i, pos = 0, err; -+ -+ err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); -+ if (err) -+ return err; -+ for (i = 0; i < c->lsave_cnt; i++) { -+ int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits); -+ -+ if (lnum < c->main_first || lnum >= c->leb_cnt) -+ return -EINVAL; -+ c->lsave[i] = lnum; -+ } -+ err = check_lpt_crc(buf, c->lsave_sz); -+ return err; -+} -+ -+/** -+ * validate_nnode - validate a nnode. -+ * @c: UBIFS file-system description object -+ * @nnode: nnode to validate -+ * @parent: parent nnode (or NULL for the root nnode) -+ * @iip: index in parent -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode, -+ struct ubifs_nnode *parent, int iip) -+{ -+ int i, lvl, max_offs; -+ -+ if (c->big_lpt) { -+ int num = calc_nnode_num_from_parent(c, parent, iip); -+ -+ if (nnode->num != num) -+ return -EINVAL; -+ } -+ lvl = parent ? parent->level - 1 : c->lpt_hght; -+ if (lvl < 1) -+ return -EINVAL; -+ if (lvl == 1) -+ max_offs = c->leb_size - c->pnode_sz; -+ else -+ max_offs = c->leb_size - c->nnode_sz; -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ int lnum = nnode->nbranch[i].lnum; -+ int offs = nnode->nbranch[i].offs; -+ -+ if (lnum == 0) { -+ if (offs != 0) -+ return -EINVAL; -+ continue; -+ } -+ if (lnum < c->lpt_first || lnum > c->lpt_last) -+ return -EINVAL; -+ if (offs < 0 || offs > max_offs) -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/** -+ * validate_pnode - validate a pnode. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode to validate -+ * @parent: parent nnode -+ * @iip: index in parent -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode, -+ struct ubifs_nnode *parent, int iip) -+{ -+ int i; -+ -+ if (c->big_lpt) { -+ int num = calc_pnode_num_from_parent(c, parent, iip); -+ -+ if (pnode->num != num) -+ return -EINVAL; -+ } -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ int free = pnode->lprops[i].free; -+ int dirty = pnode->lprops[i].dirty; -+ -+ if (free < 0 || free > c->leb_size || free % c->min_io_size || -+ (free & 7)) -+ return -EINVAL; -+ if (dirty < 0 || dirty > c->leb_size || (dirty & 7)) -+ return -EINVAL; -+ if (dirty + free > c->leb_size) -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/** -+ * set_pnode_lnum - set LEB numbers on a pnode. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode to update -+ * -+ * This function calculates the LEB numbers for the LEB properties it contains -+ * based on the pnode number. -+ */ -+static void set_pnode_lnum(const struct ubifs_info *c, -+ struct ubifs_pnode *pnode) -+{ -+ int i, lnum; -+ -+ lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first; -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ if (lnum >= c->leb_cnt) -+ return; -+ pnode->lprops[i].lnum = lnum++; -+ } -+} -+ -+/** -+ * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory. -+ * @c: UBIFS file-system description object -+ * @parent: parent nnode (or NULL for the root) -+ * @iip: index in parent -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) -+{ -+ struct ubifs_nbranch *branch = NULL; -+ struct ubifs_nnode *nnode = NULL; -+ void *buf = c->lpt_nod_buf; -+ int err, lnum, offs; -+ -+ if (parent) { -+ branch = &parent->nbranch[iip]; -+ lnum = branch->lnum; -+ offs = branch->offs; -+ } else { -+ lnum = c->lpt_lnum; -+ offs = c->lpt_offs; -+ } -+ nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS); -+ if (!nnode) { -+ err = -ENOMEM; -+ goto out; -+ } -+ if (lnum == 0) { -+ /* -+ * This nnode was not written which just means that the LEB -+ * properties in the subtree below it describe empty LEBs. We -+ * make the nnode as though we had read it, which in fact means -+ * doing almost nothing. -+ */ -+ if (c->big_lpt) -+ nnode->num = calc_nnode_num_from_parent(c, parent, iip); -+ } else { -+ err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); -+ if (err) -+ goto out; -+ err = ubifs_unpack_nnode(c, buf, nnode); -+ if (err) -+ goto out; -+ } -+ err = validate_nnode(c, nnode, parent, iip); -+ if (err) -+ goto out; -+ if (!c->big_lpt) -+ nnode->num = calc_nnode_num_from_parent(c, parent, iip); -+ if (parent) { -+ branch->nnode = nnode; -+ nnode->level = parent->level - 1; -+ } else { -+ c->nroot = nnode; -+ nnode->level = c->lpt_hght; -+ } -+ nnode->parent = parent; -+ nnode->iip = iip; -+ return 0; -+ -+out: -+ ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); -+ kfree(nnode); -+ return err; -+} -+ -+/** -+ * read_pnode - read a pnode from flash and link it to the tree in memory. -+ * @c: UBIFS file-system description object -+ * @parent: parent nnode -+ * @iip: index in parent -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) -+{ -+ struct ubifs_nbranch *branch; -+ struct ubifs_pnode *pnode = NULL; -+ void *buf = c->lpt_nod_buf; -+ int err, lnum, offs; -+ -+ branch = &parent->nbranch[iip]; -+ lnum = branch->lnum; -+ offs = branch->offs; -+ pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); -+ if (!pnode) { -+ err = -ENOMEM; -+ goto out; -+ } -+ if (lnum == 0) { -+ /* -+ * This pnode was not written which just means that the LEB -+ * properties in it describe empty LEBs. We make the pnode as -+ * though we had read it. -+ */ -+ int i; -+ -+ if (c->big_lpt) -+ pnode->num = calc_pnode_num_from_parent(c, parent, iip); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ struct ubifs_lprops * const lprops = &pnode->lprops[i]; -+ -+ lprops->free = c->leb_size; -+ lprops->flags = ubifs_categorize_lprops(c, lprops); -+ } -+ } else { -+ err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); -+ if (err) -+ goto out; -+ err = unpack_pnode(c, buf, pnode); -+ if (err) -+ goto out; -+ } -+ err = validate_pnode(c, pnode, parent, iip); -+ if (err) -+ goto out; -+ if (!c->big_lpt) -+ pnode->num = calc_pnode_num_from_parent(c, parent, iip); -+ branch->pnode = pnode; -+ pnode->parent = parent; -+ pnode->iip = iip; -+ set_pnode_lnum(c, pnode); -+ c->pnodes_have += 1; -+ return 0; -+ -+out: -+ ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); -+ dbg_dump_pnode(c, pnode, parent, iip); -+ dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); -+ kfree(pnode); -+ return err; -+} -+ -+/** -+ * read_ltab - read LPT's own lprops table. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int read_ltab(struct ubifs_info *c) -+{ -+ int err; -+ void *buf; -+ -+ buf = vmalloc(c->ltab_sz); -+ if (!buf) -+ return -ENOMEM; -+ err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); -+ if (err) -+ goto out; -+ err = unpack_ltab(c, buf); -+out: -+ vfree(buf); -+ return err; -+} -+ -+/** -+ * read_lsave - read LPT's save table. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int read_lsave(struct ubifs_info *c) -+{ -+ int err, i; -+ void *buf; -+ -+ buf = vmalloc(c->lsave_sz); -+ if (!buf) -+ return -ENOMEM; -+ err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); -+ if (err) -+ goto out; -+ err = unpack_lsave(c, buf); -+ if (err) -+ goto out; -+ for (i = 0; i < c->lsave_cnt; i++) { -+ int lnum = c->lsave[i]; -+ -+ /* -+ * Due to automatic resizing, the values in the lsave table -+ * could be beyond the volume size - just ignore them. -+ */ -+ if (lnum >= c->leb_cnt) -+ continue; -+ ubifs_lpt_lookup(c, lnum); -+ } -+out: -+ vfree(buf); -+ return err; -+} -+ -+/** -+ * ubifs_get_nnode - get a nnode. -+ * @c: UBIFS file-system description object -+ * @parent: parent nnode (or NULL for the root) -+ * @iip: index in parent -+ * -+ * This function returns a pointer to the nnode on success or a negative error -+ * code on failure. -+ */ -+struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, -+ struct ubifs_nnode *parent, int iip) -+{ -+ struct ubifs_nbranch *branch; -+ struct ubifs_nnode *nnode; -+ int err; -+ -+ branch = &parent->nbranch[iip]; -+ nnode = branch->nnode; -+ if (nnode) -+ return nnode; -+ err = ubifs_read_nnode(c, parent, iip); -+ if (err) -+ return ERR_PTR(err); -+ return branch->nnode; -+} -+ -+/** -+ * ubifs_get_pnode - get a pnode. -+ * @c: UBIFS file-system description object -+ * @parent: parent nnode -+ * @iip: index in parent -+ * -+ * This function returns a pointer to the pnode on success or a negative error -+ * code on failure. -+ */ -+struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, -+ struct ubifs_nnode *parent, int iip) -+{ -+ struct ubifs_nbranch *branch; -+ struct ubifs_pnode *pnode; -+ int err; -+ -+ branch = &parent->nbranch[iip]; -+ pnode = branch->pnode; -+ if (pnode) -+ return pnode; -+ err = read_pnode(c, parent, iip); -+ if (err) -+ return ERR_PTR(err); -+ update_cats(c, branch->pnode); -+ return branch->pnode; -+} -+ -+/** -+ * ubifs_lpt_lookup - lookup LEB properties in the LPT. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to lookup -+ * -+ * This function returns a pointer to the LEB properties on success or a -+ * negative error code on failure. -+ */ -+struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) -+{ -+ int err, i, h, iip, shft; -+ struct ubifs_nnode *nnode; -+ struct ubifs_pnode *pnode; -+ -+ if (!c->nroot) { -+ err = ubifs_read_nnode(c, NULL, 0); -+ if (err) -+ return ERR_PTR(err); -+ } -+ nnode = c->nroot; -+ i = lnum - c->main_first; -+ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; -+ for (h = 1; h < c->lpt_hght; h++) { -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ shft -= UBIFS_LPT_FANOUT_SHIFT; -+ nnode = ubifs_get_nnode(c, nnode, iip); -+ if (IS_ERR(nnode)) -+ return ERR_PTR(PTR_ERR(nnode)); -+ } -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ shft -= UBIFS_LPT_FANOUT_SHIFT; -+ pnode = ubifs_get_pnode(c, nnode, iip); -+ if (IS_ERR(pnode)) -+ return ERR_PTR(PTR_ERR(pnode)); -+ iip = (i & (UBIFS_LPT_FANOUT - 1)); -+ dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, -+ pnode->lprops[iip].free, pnode->lprops[iip].dirty, -+ pnode->lprops[iip].flags); -+ return &pnode->lprops[iip]; -+} -+ -+/** -+ * dirty_cow_nnode - ensure a nnode is not being committed. -+ * @c: UBIFS file-system description object -+ * @nnode: nnode to check -+ * -+ * Returns dirtied nnode on success or negative error code on failure. -+ */ -+static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, -+ struct ubifs_nnode *nnode) -+{ -+ struct ubifs_nnode *n; -+ int i; -+ -+ if (!test_bit(COW_CNODE, &nnode->flags)) { -+ /* nnode is not being committed */ -+ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { -+ c->dirty_nn_cnt += 1; -+ ubifs_add_nnode_dirt(c, nnode); -+ } -+ return nnode; -+ } -+ -+ /* nnode is being committed, so copy it */ -+ n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); -+ if (unlikely(!n)) -+ return ERR_PTR(-ENOMEM); -+ -+ memcpy(n, nnode, sizeof(struct ubifs_nnode)); -+ n->cnext = NULL; -+ __set_bit(DIRTY_CNODE, &n->flags); -+ __clear_bit(COW_CNODE, &n->flags); -+ -+ /* The children now have new parent */ -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ struct ubifs_nbranch *branch = &n->nbranch[i]; -+ -+ if (branch->cnode) -+ branch->cnode->parent = n; -+ } -+ -+ ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags)); -+ __set_bit(OBSOLETE_CNODE, &nnode->flags); -+ -+ c->dirty_nn_cnt += 1; -+ ubifs_add_nnode_dirt(c, nnode); -+ if (nnode->parent) -+ nnode->parent->nbranch[n->iip].nnode = n; -+ else -+ c->nroot = n; -+ return n; -+} -+ -+/** -+ * dirty_cow_pnode - ensure a pnode is not being committed. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode to check -+ * -+ * Returns dirtied pnode on success or negative error code on failure. -+ */ -+static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, -+ struct ubifs_pnode *pnode) -+{ -+ struct ubifs_pnode *p; -+ -+ if (!test_bit(COW_CNODE, &pnode->flags)) { -+ /* pnode is not being committed */ -+ if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { -+ c->dirty_pn_cnt += 1; -+ add_pnode_dirt(c, pnode); -+ } -+ return pnode; -+ } -+ -+ /* pnode is being committed, so copy it */ -+ p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); -+ if (unlikely(!p)) -+ return ERR_PTR(-ENOMEM); -+ -+ memcpy(p, pnode, sizeof(struct ubifs_pnode)); -+ p->cnext = NULL; -+ __set_bit(DIRTY_CNODE, &p->flags); -+ __clear_bit(COW_CNODE, &p->flags); -+ replace_cats(c, pnode, p); -+ -+ ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags)); -+ __set_bit(OBSOLETE_CNODE, &pnode->flags); -+ -+ c->dirty_pn_cnt += 1; -+ add_pnode_dirt(c, pnode); -+ pnode->parent->nbranch[p->iip].pnode = p; -+ return p; -+} -+ -+/** -+ * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to lookup -+ * -+ * This function returns a pointer to the LEB properties on success or a -+ * negative error code on failure. -+ */ -+struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) -+{ -+ int err, i, h, iip, shft; -+ struct ubifs_nnode *nnode; -+ struct ubifs_pnode *pnode; -+ -+ if (!c->nroot) { -+ err = ubifs_read_nnode(c, NULL, 0); -+ if (err) -+ return ERR_PTR(err); -+ } -+ nnode = c->nroot; -+ nnode = dirty_cow_nnode(c, nnode); -+ if (IS_ERR(nnode)) -+ return ERR_PTR(PTR_ERR(nnode)); -+ i = lnum - c->main_first; -+ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; -+ for (h = 1; h < c->lpt_hght; h++) { -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ shft -= UBIFS_LPT_FANOUT_SHIFT; -+ nnode = ubifs_get_nnode(c, nnode, iip); -+ if (IS_ERR(nnode)) -+ return ERR_PTR(PTR_ERR(nnode)); -+ nnode = dirty_cow_nnode(c, nnode); -+ if (IS_ERR(nnode)) -+ return ERR_PTR(PTR_ERR(nnode)); -+ } -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ shft -= UBIFS_LPT_FANOUT_SHIFT; -+ pnode = ubifs_get_pnode(c, nnode, iip); -+ if (IS_ERR(pnode)) -+ return ERR_PTR(PTR_ERR(pnode)); -+ pnode = dirty_cow_pnode(c, pnode); -+ if (IS_ERR(pnode)) -+ return ERR_PTR(PTR_ERR(pnode)); -+ iip = (i & (UBIFS_LPT_FANOUT - 1)); -+ dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, -+ pnode->lprops[iip].free, pnode->lprops[iip].dirty, -+ pnode->lprops[iip].flags); -+ ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags)); -+ return &pnode->lprops[iip]; -+} -+ -+/** -+ * lpt_init_rd - initialize the LPT for reading. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int lpt_init_rd(struct ubifs_info *c) -+{ -+ int err, i; -+ -+ c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); -+ if (!c->ltab) -+ return -ENOMEM; -+ -+ i = max_t(int, c->nnode_sz, c->pnode_sz); -+ c->lpt_nod_buf = kmalloc(i, GFP_KERNEL); -+ if (!c->lpt_nod_buf) -+ return -ENOMEM; -+ -+ for (i = 0; i < LPROPS_HEAP_CNT; i++) { -+ c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, -+ GFP_KERNEL); -+ if (!c->lpt_heap[i].arr) -+ return -ENOMEM; -+ c->lpt_heap[i].cnt = 0; -+ c->lpt_heap[i].max_cnt = LPT_HEAP_SZ; -+ } -+ -+ c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL); -+ if (!c->dirty_idx.arr) -+ return -ENOMEM; -+ c->dirty_idx.cnt = 0; -+ c->dirty_idx.max_cnt = LPT_HEAP_SZ; -+ -+ err = read_ltab(c); -+ if (err) -+ return err; -+ -+ dbg_lp("space_bits %d", c->space_bits); -+ dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); -+ dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); -+ dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); -+ dbg_lp("pcnt_bits %d", c->pcnt_bits); -+ dbg_lp("lnum_bits %d", c->lnum_bits); -+ dbg_lp("pnode_sz %d", c->pnode_sz); -+ dbg_lp("nnode_sz %d", c->nnode_sz); -+ dbg_lp("ltab_sz %d", c->ltab_sz); -+ dbg_lp("lsave_sz %d", c->lsave_sz); -+ dbg_lp("lsave_cnt %d", c->lsave_cnt); -+ dbg_lp("lpt_hght %d", c->lpt_hght); -+ dbg_lp("big_lpt %d", c->big_lpt); -+ dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); -+ dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); -+ dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); -+ if (c->big_lpt) -+ dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); -+ -+ return 0; -+} -+ -+/** -+ * lpt_init_wr - initialize the LPT for writing. -+ * @c: UBIFS file-system description object -+ * -+ * 'lpt_init_rd()' must have been called already. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int lpt_init_wr(struct ubifs_info *c) -+{ -+ int err, i; -+ -+ c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); -+ if (!c->ltab_cmt) -+ return -ENOMEM; -+ -+ c->lpt_buf = vmalloc(c->leb_size); -+ if (!c->lpt_buf) -+ return -ENOMEM; -+ -+ if (c->big_lpt) { -+ c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS); -+ if (!c->lsave) -+ return -ENOMEM; -+ err = read_lsave(c); -+ if (err) -+ return err; -+ } -+ -+ for (i = 0; i < c->lpt_lebs; i++) -+ if (c->ltab[i].free == c->leb_size) { -+ err = ubifs_leb_unmap(c, i + c->lpt_first); -+ if (err) -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * ubifs_lpt_init - initialize the LPT. -+ * @c: UBIFS file-system description object -+ * @rd: whether to initialize lpt for reading -+ * @wr: whether to initialize lpt for writing -+ * -+ * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true -+ * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is -+ * true. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) -+{ -+ int err; -+ -+ if (rd) { -+ err = lpt_init_rd(c); -+ if (err) -+ return err; -+ } -+ -+ if (wr) { -+ err = lpt_init_wr(c); -+ if (err) -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * struct lpt_scan_node - somewhere to put nodes while we scan LPT. -+ * @nnode: where to keep a nnode -+ * @pnode: where to keep a pnode -+ * @cnode: where to keep a cnode -+ * @in_tree: is the node in the tree in memory -+ * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in -+ * the tree -+ * @ptr.pnode: ditto for pnode -+ * @ptr.cnode: ditto for cnode -+ */ -+struct lpt_scan_node { -+ union { -+ struct ubifs_nnode nnode; -+ struct ubifs_pnode pnode; -+ struct ubifs_cnode cnode; -+ }; -+ int in_tree; -+ union { -+ struct ubifs_nnode *nnode; -+ struct ubifs_pnode *pnode; -+ struct ubifs_cnode *cnode; -+ } ptr; -+}; -+ -+/** -+ * scan_get_nnode - for the scan, get a nnode from either the tree or flash. -+ * @c: the UBIFS file-system description object -+ * @path: where to put the nnode -+ * @parent: parent of the nnode -+ * @iip: index in parent of the nnode -+ * -+ * This function returns a pointer to the nnode on success or a negative error -+ * code on failure. -+ */ -+static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, -+ struct lpt_scan_node *path, -+ struct ubifs_nnode *parent, int iip) -+{ -+ struct ubifs_nbranch *branch; -+ struct ubifs_nnode *nnode; -+ void *buf = c->lpt_nod_buf; -+ int err; -+ -+ branch = &parent->nbranch[iip]; -+ nnode = branch->nnode; -+ if (nnode) { -+ path->in_tree = 1; -+ path->ptr.nnode = nnode; -+ return nnode; -+ } -+ nnode = &path->nnode; -+ path->in_tree = 0; -+ path->ptr.nnode = nnode; -+ memset(nnode, 0, sizeof(struct ubifs_nnode)); -+ if (branch->lnum == 0) { -+ /* -+ * This nnode was not written which just means that the LEB -+ * properties in the subtree below it describe empty LEBs. We -+ * make the nnode as though we had read it, which in fact means -+ * doing almost nothing. -+ */ -+ if (c->big_lpt) -+ nnode->num = calc_nnode_num_from_parent(c, parent, iip); -+ } else { -+ err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, -+ c->nnode_sz); -+ if (err) -+ return ERR_PTR(err); -+ err = ubifs_unpack_nnode(c, buf, nnode); -+ if (err) -+ return ERR_PTR(err); -+ } -+ err = validate_nnode(c, nnode, parent, iip); -+ if (err) -+ return ERR_PTR(err); -+ if (!c->big_lpt) -+ nnode->num = calc_nnode_num_from_parent(c, parent, iip); -+ nnode->level = parent->level - 1; -+ nnode->parent = parent; -+ nnode->iip = iip; -+ return nnode; -+} -+ -+/** -+ * scan_get_pnode - for the scan, get a pnode from either the tree or flash. -+ * @c: the UBIFS file-system description object -+ * @path: where to put the pnode -+ * @parent: parent of the pnode -+ * @iip: index in parent of the pnode -+ * -+ * This function returns a pointer to the pnode on success or a negative error -+ * code on failure. -+ */ -+static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, -+ struct lpt_scan_node *path, -+ struct ubifs_nnode *parent, int iip) -+{ -+ struct ubifs_nbranch *branch; -+ struct ubifs_pnode *pnode; -+ void *buf = c->lpt_nod_buf; -+ int err; -+ -+ branch = &parent->nbranch[iip]; -+ pnode = branch->pnode; -+ if (pnode) { -+ path->in_tree = 1; -+ path->ptr.pnode = pnode; -+ return pnode; -+ } -+ pnode = &path->pnode; -+ path->in_tree = 0; -+ path->ptr.pnode = pnode; -+ memset(pnode, 0, sizeof(struct ubifs_pnode)); -+ if (branch->lnum == 0) { -+ /* -+ * This pnode was not written which just means that the LEB -+ * properties in it describe empty LEBs. We make the pnode as -+ * though we had read it. -+ */ -+ int i; -+ -+ if (c->big_lpt) -+ pnode->num = calc_pnode_num_from_parent(c, parent, iip); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ struct ubifs_lprops * const lprops = &pnode->lprops[i]; -+ -+ lprops->free = c->leb_size; -+ lprops->flags = ubifs_categorize_lprops(c, lprops); -+ } -+ } else { -+ ubifs_assert(branch->lnum >= c->lpt_first && -+ branch->lnum <= c->lpt_last); -+ ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); -+ err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, -+ c->pnode_sz); -+ if (err) -+ return ERR_PTR(err); -+ err = unpack_pnode(c, buf, pnode); -+ if (err) -+ return ERR_PTR(err); -+ } -+ err = validate_pnode(c, pnode, parent, iip); -+ if (err) -+ return ERR_PTR(err); -+ if (!c->big_lpt) -+ pnode->num = calc_pnode_num_from_parent(c, parent, iip); -+ pnode->parent = parent; -+ pnode->iip = iip; -+ set_pnode_lnum(c, pnode); -+ return pnode; -+} -+ -+/** -+ * ubifs_lpt_scan_nolock - scan the LPT. -+ * @c: the UBIFS file-system description object -+ * @start_lnum: LEB number from which to start scanning -+ * @end_lnum: LEB number at which to stop scanning -+ * @scan_cb: callback function called for each lprops -+ * @data: data to be passed to the callback function -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, -+ ubifs_lpt_scan_callback scan_cb, void *data) -+{ -+ int err = 0, i, h, iip, shft; -+ struct ubifs_nnode *nnode; -+ struct ubifs_pnode *pnode; -+ struct lpt_scan_node *path; -+ -+ if (start_lnum == -1) { -+ start_lnum = end_lnum + 1; -+ if (start_lnum >= c->leb_cnt) -+ start_lnum = c->main_first; -+ } -+ -+ ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt); -+ ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt); -+ -+ if (!c->nroot) { -+ err = ubifs_read_nnode(c, NULL, 0); -+ if (err) -+ return err; -+ } -+ -+ path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1), -+ GFP_NOFS); -+ if (!path) -+ return -ENOMEM; -+ -+ path[0].ptr.nnode = c->nroot; -+ path[0].in_tree = 1; -+again: -+ /* Descend to the pnode containing start_lnum */ -+ nnode = c->nroot; -+ i = start_lnum - c->main_first; -+ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; -+ for (h = 1; h < c->lpt_hght; h++) { -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ shft -= UBIFS_LPT_FANOUT_SHIFT; -+ nnode = scan_get_nnode(c, path + h, nnode, iip); -+ if (IS_ERR(nnode)) { -+ err = PTR_ERR(nnode); -+ goto out; -+ } -+ } -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ shft -= UBIFS_LPT_FANOUT_SHIFT; -+ pnode = scan_get_pnode(c, path + h, nnode, iip); -+ if (IS_ERR(pnode)) { -+ err = PTR_ERR(pnode); -+ goto out; -+ } -+ iip = (i & (UBIFS_LPT_FANOUT - 1)); -+ -+ /* Loop for each lprops */ -+ while (1) { -+ struct ubifs_lprops *lprops = &pnode->lprops[iip]; -+ int ret, lnum = lprops->lnum; -+ -+ ret = scan_cb(c, lprops, path[h].in_tree, data); -+ if (ret < 0) { -+ err = ret; -+ goto out; -+ } -+ if (ret & LPT_SCAN_ADD) { -+ /* Add all the nodes in path to the tree in memory */ -+ for (h = 1; h < c->lpt_hght; h++) { -+ const size_t sz = sizeof(struct ubifs_nnode); -+ struct ubifs_nnode *parent; -+ -+ if (path[h].in_tree) -+ continue; -+ nnode = kmalloc(sz, GFP_NOFS); -+ if (!nnode) { -+ err = -ENOMEM; -+ goto out; -+ } -+ memcpy(nnode, &path[h].nnode, sz); -+ parent = nnode->parent; -+ parent->nbranch[nnode->iip].nnode = nnode; -+ path[h].ptr.nnode = nnode; -+ path[h].in_tree = 1; -+ path[h + 1].cnode.parent = nnode; -+ } -+ if (path[h].in_tree) -+ ubifs_ensure_cat(c, lprops); -+ else { -+ const size_t sz = sizeof(struct ubifs_pnode); -+ struct ubifs_nnode *parent; -+ -+ pnode = kmalloc(sz, GFP_NOFS); -+ if (!pnode) { -+ err = -ENOMEM; -+ goto out; -+ } -+ memcpy(pnode, &path[h].pnode, sz); -+ parent = pnode->parent; -+ parent->nbranch[pnode->iip].pnode = pnode; -+ path[h].ptr.pnode = pnode; -+ path[h].in_tree = 1; -+ update_cats(c, pnode); -+ c->pnodes_have += 1; -+ } -+ err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *) -+ c->nroot, 0, 0); -+ if (err) -+ goto out; -+ err = dbg_check_cats(c); -+ if (err) -+ goto out; -+ } -+ if (ret & LPT_SCAN_STOP) { -+ err = 0; -+ break; -+ } -+ /* Get the next lprops */ -+ if (lnum == end_lnum) { -+ /* -+ * We got to the end without finding what we were -+ * looking for -+ */ -+ err = -ENOSPC; -+ goto out; -+ } -+ if (lnum + 1 >= c->leb_cnt) { -+ /* Wrap-around to the beginning */ -+ start_lnum = c->main_first; -+ goto again; -+ } -+ if (iip + 1 < UBIFS_LPT_FANOUT) { -+ /* Next lprops is in the same pnode */ -+ iip += 1; -+ continue; -+ } -+ /* We need to get the next pnode. Go up until we can go right */ -+ iip = pnode->iip; -+ while (1) { -+ h -= 1; -+ ubifs_assert(h >= 0); -+ nnode = path[h].ptr.nnode; -+ if (iip + 1 < UBIFS_LPT_FANOUT) -+ break; -+ iip = nnode->iip; -+ } -+ /* Go right */ -+ iip += 1; -+ /* Descend to the pnode */ -+ h += 1; -+ for (; h < c->lpt_hght; h++) { -+ nnode = scan_get_nnode(c, path + h, nnode, iip); -+ if (IS_ERR(nnode)) { -+ err = PTR_ERR(nnode); -+ goto out; -+ } -+ iip = 0; -+ } -+ pnode = scan_get_pnode(c, path + h, nnode, iip); -+ if (IS_ERR(pnode)) { -+ err = PTR_ERR(pnode); -+ goto out; -+ } -+ iip = 0; -+ } -+out: -+ kfree(path); -+ return err; -+} -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+/** -+ * dbg_chk_pnode - check a pnode. -+ * @c: the UBIFS file-system description object -+ * @pnode: pnode to check -+ * @col: pnode column -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, -+ int col) -+{ -+ int i; -+ -+ if (pnode->num != col) { -+ dbg_err("pnode num %d expected %d parent num %d iip %d", -+ pnode->num, col, pnode->parent->num, pnode->iip); -+ return -EINVAL; -+ } -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ struct ubifs_lprops *lp, *lprops = &pnode->lprops[i]; -+ int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i + -+ c->main_first; -+ int found, cat = lprops->flags & LPROPS_CAT_MASK; -+ struct ubifs_lpt_heap *heap; -+ struct list_head *list = NULL; -+ -+ if (lnum >= c->leb_cnt) -+ continue; -+ if (lprops->lnum != lnum) { -+ dbg_err("bad LEB number %d expected %d", -+ lprops->lnum, lnum); -+ return -EINVAL; -+ } -+ if (lprops->flags & LPROPS_TAKEN) { -+ if (cat != LPROPS_UNCAT) { -+ dbg_err("LEB %d taken but not uncat %d", -+ lprops->lnum, cat); -+ return -EINVAL; -+ } -+ continue; -+ } -+ if (lprops->flags & LPROPS_INDEX) { -+ switch (cat) { -+ case LPROPS_UNCAT: -+ case LPROPS_DIRTY_IDX: -+ case LPROPS_FRDI_IDX: -+ break; -+ default: -+ dbg_err("LEB %d index but cat %d", -+ lprops->lnum, cat); -+ return -EINVAL; -+ } -+ } else { -+ switch (cat) { -+ case LPROPS_UNCAT: -+ case LPROPS_DIRTY: -+ case LPROPS_FREE: -+ case LPROPS_EMPTY: -+ case LPROPS_FREEABLE: -+ break; -+ default: -+ dbg_err("LEB %d not index but cat %d", -+ lprops->lnum, cat); -+ return -EINVAL; -+ } -+ } -+ switch (cat) { -+ case LPROPS_UNCAT: -+ list = &c->uncat_list; -+ break; -+ case LPROPS_EMPTY: -+ list = &c->empty_list; -+ break; -+ case LPROPS_FREEABLE: -+ list = &c->freeable_list; -+ break; -+ case LPROPS_FRDI_IDX: -+ list = &c->frdi_idx_list; -+ break; -+ } -+ found = 0; -+ switch (cat) { -+ case LPROPS_DIRTY: -+ case LPROPS_DIRTY_IDX: -+ case LPROPS_FREE: -+ heap = &c->lpt_heap[cat - 1]; -+ if (lprops->hpos < heap->cnt && -+ heap->arr[lprops->hpos] == lprops) -+ found = 1; -+ break; -+ case LPROPS_UNCAT: -+ case LPROPS_EMPTY: -+ case LPROPS_FREEABLE: -+ case LPROPS_FRDI_IDX: -+ list_for_each_entry(lp, list, list) -+ if (lprops == lp) { -+ found = 1; -+ break; -+ } -+ break; -+ } -+ if (!found) { -+ dbg_err("LEB %d cat %d not found in cat heap/list", -+ lprops->lnum, cat); -+ return -EINVAL; -+ } -+ switch (cat) { -+ case LPROPS_EMPTY: -+ if (lprops->free != c->leb_size) { -+ dbg_err("LEB %d cat %d free %d dirty %d", -+ lprops->lnum, cat, lprops->free, -+ lprops->dirty); -+ return -EINVAL; -+ } -+ case LPROPS_FREEABLE: -+ case LPROPS_FRDI_IDX: -+ if (lprops->free + lprops->dirty != c->leb_size) { -+ dbg_err("LEB %d cat %d free %d dirty %d", -+ lprops->lnum, cat, lprops->free, -+ lprops->dirty); -+ return -EINVAL; -+ } -+ } -+ } -+ return 0; -+} -+ -+/** -+ * dbg_check_lpt_nodes - check nnodes and pnodes. -+ * @c: the UBIFS file-system description object -+ * @cnode: next cnode (nnode or pnode) to check -+ * @row: row of cnode (root is zero) -+ * @col: column of cnode (leftmost is zero) -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, -+ int row, int col) -+{ -+ struct ubifs_nnode *nnode, *nn; -+ struct ubifs_cnode *cn; -+ int num, iip = 0, err; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) -+ return 0; -+ -+ while (cnode) { -+ ubifs_assert(row >= 0); -+ nnode = cnode->parent; -+ if (cnode->level) { -+ /* cnode is a nnode */ -+ num = calc_nnode_num(row, col); -+ if (cnode->num != num) { -+ dbg_err("nnode num %d expected %d " -+ "parent num %d iip %d", cnode->num, num, -+ (nnode ? nnode->num : 0), cnode->iip); -+ return -EINVAL; -+ } -+ nn = (struct ubifs_nnode *)cnode; -+ while (iip < UBIFS_LPT_FANOUT) { -+ cn = nn->nbranch[iip].cnode; -+ if (cn) { -+ /* Go down */ -+ row += 1; -+ col <<= UBIFS_LPT_FANOUT_SHIFT; -+ col += iip; -+ iip = 0; -+ cnode = cn; -+ break; -+ } -+ /* Go right */ -+ iip += 1; -+ } -+ if (iip < UBIFS_LPT_FANOUT) -+ continue; -+ } else { -+ struct ubifs_pnode *pnode; -+ -+ /* cnode is a pnode */ -+ pnode = (struct ubifs_pnode *)cnode; -+ err = dbg_chk_pnode(c, pnode, col); -+ if (err) -+ return err; -+ } -+ /* Go up and to the right */ -+ row -= 1; -+ col >>= UBIFS_LPT_FANOUT_SHIFT; -+ iip = cnode->iip + 1; -+ cnode = (struct ubifs_cnode *)nnode; -+ } -+ return 0; -+} -+ -+#endif /* CONFIG_UBIFS_FS_DEBUG */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/lpt_commit.c linux-2.6.24/fs/ubifs/lpt_commit.c ---- linux-2.6.24.orig/fs/ubifs/lpt_commit.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/lpt_commit.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1969 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements commit-related functionality of the LEB properties -+ * subsystem. -+ */ -+ -+#include <linux/crc16.h> -+#include "ubifs.h" -+ -+/** -+ * first_dirty_cnode - find first dirty cnode. -+ * @c: UBIFS file-system description object -+ * @nnode: nnode at which to start -+ * -+ * This function returns the first dirty cnode or %NULL if there is not one. -+ */ -+static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode) -+{ -+ ubifs_assert(nnode); -+ while (1) { -+ int i, cont = 0; -+ -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ struct ubifs_cnode *cnode; -+ -+ cnode = nnode->nbranch[i].cnode; -+ if (cnode && -+ test_bit(DIRTY_CNODE, &cnode->flags)) { -+ if (cnode->level == 0) -+ return cnode; -+ nnode = (struct ubifs_nnode *)cnode; -+ cont = 1; -+ break; -+ } -+ } -+ if (!cont) -+ return (struct ubifs_cnode *)nnode; -+ } -+} -+ -+/** -+ * next_dirty_cnode - find next dirty cnode. -+ * @cnode: cnode from which to begin searching -+ * -+ * This function returns the next dirty cnode or %NULL if there is not one. -+ */ -+static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode) -+{ -+ struct ubifs_nnode *nnode; -+ int i; -+ -+ ubifs_assert(cnode); -+ nnode = cnode->parent; -+ if (!nnode) -+ return NULL; -+ for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) { -+ cnode = nnode->nbranch[i].cnode; -+ if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) { -+ if (cnode->level == 0) -+ return cnode; /* cnode is a pnode */ -+ /* cnode is a nnode */ -+ return first_dirty_cnode((struct ubifs_nnode *)cnode); -+ } -+ } -+ return (struct ubifs_cnode *)nnode; -+} -+ -+/** -+ * get_cnodes_to_commit - create list of dirty cnodes to commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns the number of cnodes to commit. -+ */ -+static int get_cnodes_to_commit(struct ubifs_info *c) -+{ -+ struct ubifs_cnode *cnode, *cnext; -+ int cnt = 0; -+ -+ if (!c->nroot) -+ return 0; -+ -+ if (!test_bit(DIRTY_CNODE, &c->nroot->flags)) -+ return 0; -+ -+ c->lpt_cnext = first_dirty_cnode(c->nroot); -+ cnode = c->lpt_cnext; -+ if (!cnode) -+ return 0; -+ cnt += 1; -+ while (1) { -+ ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); -+ __set_bit(COW_ZNODE, &cnode->flags); -+ cnext = next_dirty_cnode(cnode); -+ if (!cnext) { -+ cnode->cnext = c->lpt_cnext; -+ break; -+ } -+ cnode->cnext = cnext; -+ cnode = cnext; -+ cnt += 1; -+ } -+ dbg_cmt("committing %d cnodes", cnt); -+ dbg_lp("committing %d cnodes", cnt); -+ ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt); -+ return cnt; -+} -+ -+/** -+ * upd_ltab - update LPT LEB properties. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number -+ * @free: amount of free space -+ * @dirty: amount of dirty space to add -+ */ -+static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty) -+{ -+ dbg_lp("LEB %d free %d dirty %d to %d +%d", -+ lnum, c->ltab[lnum - c->lpt_first].free, -+ c->ltab[lnum - c->lpt_first].dirty, free, dirty); -+ ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); -+ c->ltab[lnum - c->lpt_first].free = free; -+ c->ltab[lnum - c->lpt_first].dirty += dirty; -+} -+ -+/** -+ * alloc_lpt_leb - allocate an LPT LEB that is empty. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number is passed and returned here -+ * -+ * This function finds the next empty LEB in the ltab starting from @lnum. If a -+ * an empty LEB is found it is returned in @lnum and the function returns %0. -+ * Otherwise the function returns -ENOSPC. Note however, that LPT is designed -+ * never to run out of space. -+ */ -+static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) -+{ -+ int i, n; -+ -+ n = *lnum - c->lpt_first + 1; -+ for (i = n; i < c->lpt_lebs; i++) { -+ if (c->ltab[i].tgc || c->ltab[i].cmt) -+ continue; -+ if (c->ltab[i].free == c->leb_size) { -+ c->ltab[i].cmt = 1; -+ *lnum = i + c->lpt_first; -+ return 0; -+ } -+ } -+ -+ for (i = 0; i < n; i++) { -+ if (c->ltab[i].tgc || c->ltab[i].cmt) -+ continue; -+ if (c->ltab[i].free == c->leb_size) { -+ c->ltab[i].cmt = 1; -+ *lnum = i + c->lpt_first; -+ return 0; -+ } -+ } -+ return -ENOSPC; -+} -+ -+/** -+ * layout_cnodes - layout cnodes for commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int layout_cnodes(struct ubifs_info *c) -+{ -+ int lnum, offs, len, alen, done_lsave, done_ltab, err; -+ struct ubifs_cnode *cnode; -+ -+ err = dbg_chk_lpt_sz(c, 0, 0); -+ if (err) -+ return err; -+ cnode = c->lpt_cnext; -+ if (!cnode) -+ return 0; -+ lnum = c->nhead_lnum; -+ offs = c->nhead_offs; -+ /* Try to place lsave and ltab nicely */ -+ done_lsave = !c->big_lpt; -+ done_ltab = 0; -+ if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { -+ done_lsave = 1; -+ c->lsave_lnum = lnum; -+ c->lsave_offs = offs; -+ offs += c->lsave_sz; -+ dbg_chk_lpt_sz(c, 1, c->lsave_sz); -+ } -+ -+ if (offs + c->ltab_sz <= c->leb_size) { -+ done_ltab = 1; -+ c->ltab_lnum = lnum; -+ c->ltab_offs = offs; -+ offs += c->ltab_sz; -+ dbg_chk_lpt_sz(c, 1, c->ltab_sz); -+ } -+ -+ do { -+ if (cnode->level) { -+ len = c->nnode_sz; -+ c->dirty_nn_cnt -= 1; -+ } else { -+ len = c->pnode_sz; -+ c->dirty_pn_cnt -= 1; -+ } -+ while (offs + len > c->leb_size) { -+ alen = ALIGN(offs, c->min_io_size); -+ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); -+ dbg_chk_lpt_sz(c, 2, alen - offs); -+ err = alloc_lpt_leb(c, &lnum); -+ if (err) -+ goto no_space; -+ offs = 0; -+ ubifs_assert(lnum >= c->lpt_first && -+ lnum <= c->lpt_last); -+ /* Try to place lsave and ltab nicely */ -+ if (!done_lsave) { -+ done_lsave = 1; -+ c->lsave_lnum = lnum; -+ c->lsave_offs = offs; -+ offs += c->lsave_sz; -+ dbg_chk_lpt_sz(c, 1, c->lsave_sz); -+ continue; -+ } -+ if (!done_ltab) { -+ done_ltab = 1; -+ c->ltab_lnum = lnum; -+ c->ltab_offs = offs; -+ offs += c->ltab_sz; -+ dbg_chk_lpt_sz(c, 1, c->ltab_sz); -+ continue; -+ } -+ break; -+ } -+ if (cnode->parent) { -+ cnode->parent->nbranch[cnode->iip].lnum = lnum; -+ cnode->parent->nbranch[cnode->iip].offs = offs; -+ } else { -+ c->lpt_lnum = lnum; -+ c->lpt_offs = offs; -+ } -+ offs += len; -+ dbg_chk_lpt_sz(c, 1, len); -+ cnode = cnode->cnext; -+ } while (cnode && cnode != c->lpt_cnext); -+ -+ /* Make sure to place LPT's save table */ -+ if (!done_lsave) { -+ if (offs + c->lsave_sz > c->leb_size) { -+ alen = ALIGN(offs, c->min_io_size); -+ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); -+ dbg_chk_lpt_sz(c, 2, alen - offs); -+ err = alloc_lpt_leb(c, &lnum); -+ if (err) -+ goto no_space; -+ offs = 0; -+ ubifs_assert(lnum >= c->lpt_first && -+ lnum <= c->lpt_last); -+ } -+ done_lsave = 1; -+ c->lsave_lnum = lnum; -+ c->lsave_offs = offs; -+ offs += c->lsave_sz; -+ dbg_chk_lpt_sz(c, 1, c->lsave_sz); -+ } -+ -+ /* Make sure to place LPT's own lprops table */ -+ if (!done_ltab) { -+ if (offs + c->ltab_sz > c->leb_size) { -+ alen = ALIGN(offs, c->min_io_size); -+ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); -+ dbg_chk_lpt_sz(c, 2, alen - offs); -+ err = alloc_lpt_leb(c, &lnum); -+ if (err) -+ goto no_space; -+ offs = 0; -+ ubifs_assert(lnum >= c->lpt_first && -+ lnum <= c->lpt_last); -+ } -+ done_ltab = 1; -+ c->ltab_lnum = lnum; -+ c->ltab_offs = offs; -+ offs += c->ltab_sz; -+ dbg_chk_lpt_sz(c, 1, c->ltab_sz); -+ } -+ -+ alen = ALIGN(offs, c->min_io_size); -+ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); -+ dbg_chk_lpt_sz(c, 4, alen - offs); -+ err = dbg_chk_lpt_sz(c, 3, alen); -+ if (err) -+ return err; -+ return 0; -+ -+no_space: -+ ubifs_err("LPT out of space"); -+ dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " -+ "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); -+ dbg_dump_lpt_info(c); -+ dbg_dump_lpt_lebs(c); -+ dump_stack(); -+ return err; -+} -+ -+/** -+ * realloc_lpt_leb - allocate an LPT LEB that is empty. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number is passed and returned here -+ * -+ * This function duplicates exactly the results of the function alloc_lpt_leb. -+ * It is used during end commit to reallocate the same LEB numbers that were -+ * allocated by alloc_lpt_leb during start commit. -+ * -+ * This function finds the next LEB that was allocated by the alloc_lpt_leb -+ * function starting from @lnum. If a LEB is found it is returned in @lnum and -+ * the function returns %0. Otherwise the function returns -ENOSPC. -+ * Note however, that LPT is designed never to run out of space. -+ */ -+static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) -+{ -+ int i, n; -+ -+ n = *lnum - c->lpt_first + 1; -+ for (i = n; i < c->lpt_lebs; i++) -+ if (c->ltab[i].cmt) { -+ c->ltab[i].cmt = 0; -+ *lnum = i + c->lpt_first; -+ return 0; -+ } -+ -+ for (i = 0; i < n; i++) -+ if (c->ltab[i].cmt) { -+ c->ltab[i].cmt = 0; -+ *lnum = i + c->lpt_first; -+ return 0; -+ } -+ return -ENOSPC; -+} -+ -+/** -+ * write_cnodes - write cnodes for commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int write_cnodes(struct ubifs_info *c) -+{ -+ int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave; -+ struct ubifs_cnode *cnode; -+ void *buf = c->lpt_buf; -+ -+ cnode = c->lpt_cnext; -+ if (!cnode) -+ return 0; -+ lnum = c->nhead_lnum; -+ offs = c->nhead_offs; -+ from = offs; -+ /* Ensure empty LEB is unmapped */ -+ if (offs == 0) { -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } -+ /* Try to place lsave and ltab nicely */ -+ done_lsave = !c->big_lpt; -+ done_ltab = 0; -+ if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { -+ done_lsave = 1; -+ ubifs_pack_lsave(c, buf + offs, c->lsave); -+ offs += c->lsave_sz; -+ dbg_chk_lpt_sz(c, 1, c->lsave_sz); -+ } -+ -+ if (offs + c->ltab_sz <= c->leb_size) { -+ done_ltab = 1; -+ ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); -+ offs += c->ltab_sz; -+ dbg_chk_lpt_sz(c, 1, c->ltab_sz); -+ } -+ -+ /* Loop for each cnode */ -+ do { -+ if (cnode->level) -+ len = c->nnode_sz; -+ else -+ len = c->pnode_sz; -+ while (offs + len > c->leb_size) { -+ wlen = offs - from; -+ if (wlen) { -+ alen = ALIGN(wlen, c->min_io_size); -+ memset(buf + offs, 0xff, alen - wlen); -+ err = ubifs_leb_write(c, lnum, buf + from, from, -+ alen, UBI_SHORTTERM); -+ if (err) -+ return err; -+ dbg_chk_lpt_sz(c, 4, alen - wlen); -+ } -+ dbg_chk_lpt_sz(c, 2, 0); -+ err = realloc_lpt_leb(c, &lnum); -+ if (err) -+ goto no_space; -+ offs = 0; -+ from = 0; -+ ubifs_assert(lnum >= c->lpt_first && -+ lnum <= c->lpt_last); -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ /* Try to place lsave and ltab nicely */ -+ if (!done_lsave) { -+ done_lsave = 1; -+ ubifs_pack_lsave(c, buf + offs, c->lsave); -+ offs += c->lsave_sz; -+ dbg_chk_lpt_sz(c, 1, c->lsave_sz); -+ continue; -+ } -+ if (!done_ltab) { -+ done_ltab = 1; -+ ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); -+ offs += c->ltab_sz; -+ dbg_chk_lpt_sz(c, 1, c->ltab_sz); -+ continue; -+ } -+ break; -+ } -+ if (cnode->level) -+ ubifs_pack_nnode(c, buf + offs, -+ (struct ubifs_nnode *)cnode); -+ else -+ ubifs_pack_pnode(c, buf + offs, -+ (struct ubifs_pnode *)cnode); -+ /* -+ * The reason for the barriers is the same as in case of TNC. -+ * See comment in 'write_index()'. 'dirty_cow_nnode()' and -+ * 'dirty_cow_pnode()' are the functions for which this is -+ * important. -+ */ -+ clear_bit(DIRTY_CNODE, &cnode->flags); -+ smp_mb__before_clear_bit(); -+ clear_bit(COW_ZNODE, &cnode->flags); -+ smp_mb__after_clear_bit(); -+ offs += len; -+ dbg_chk_lpt_sz(c, 1, len); -+ cnode = cnode->cnext; -+ } while (cnode && cnode != c->lpt_cnext); -+ -+ /* Make sure to place LPT's save table */ -+ if (!done_lsave) { -+ if (offs + c->lsave_sz > c->leb_size) { -+ wlen = offs - from; -+ alen = ALIGN(wlen, c->min_io_size); -+ memset(buf + offs, 0xff, alen - wlen); -+ err = ubifs_leb_write(c, lnum, buf + from, from, alen, -+ UBI_SHORTTERM); -+ if (err) -+ return err; -+ dbg_chk_lpt_sz(c, 2, alen - wlen); -+ err = realloc_lpt_leb(c, &lnum); -+ if (err) -+ goto no_space; -+ offs = 0; -+ ubifs_assert(lnum >= c->lpt_first && -+ lnum <= c->lpt_last); -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } -+ done_lsave = 1; -+ ubifs_pack_lsave(c, buf + offs, c->lsave); -+ offs += c->lsave_sz; -+ dbg_chk_lpt_sz(c, 1, c->lsave_sz); -+ } -+ -+ /* Make sure to place LPT's own lprops table */ -+ if (!done_ltab) { -+ if (offs + c->ltab_sz > c->leb_size) { -+ wlen = offs - from; -+ alen = ALIGN(wlen, c->min_io_size); -+ memset(buf + offs, 0xff, alen - wlen); -+ err = ubifs_leb_write(c, lnum, buf + from, from, alen, -+ UBI_SHORTTERM); -+ if (err) -+ return err; -+ dbg_chk_lpt_sz(c, 2, alen - wlen); -+ err = realloc_lpt_leb(c, &lnum); -+ if (err) -+ goto no_space; -+ offs = 0; -+ ubifs_assert(lnum >= c->lpt_first && -+ lnum <= c->lpt_last); -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } -+ done_ltab = 1; -+ ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); -+ offs += c->ltab_sz; -+ dbg_chk_lpt_sz(c, 1, c->ltab_sz); -+ } -+ -+ /* Write remaining data in buffer */ -+ wlen = offs - from; -+ alen = ALIGN(wlen, c->min_io_size); -+ memset(buf + offs, 0xff, alen - wlen); -+ err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); -+ if (err) -+ return err; -+ -+ dbg_chk_lpt_sz(c, 4, alen - wlen); -+ err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size)); -+ if (err) -+ return err; -+ -+ c->nhead_lnum = lnum; -+ c->nhead_offs = ALIGN(offs, c->min_io_size); -+ -+ dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); -+ dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); -+ dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); -+ if (c->big_lpt) -+ dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); -+ -+ return 0; -+ -+no_space: -+ ubifs_err("LPT out of space mismatch"); -+ dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " -+ "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); -+ dbg_dump_lpt_info(c); -+ dbg_dump_lpt_lebs(c); -+ dump_stack(); -+ return err; -+} -+ -+/** -+ * next_pnode_to_dirty - find next pnode to dirty. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode -+ * -+ * This function returns the next pnode to dirty or %NULL if there are no more -+ * pnodes. Note that pnodes that have never been written (lnum == 0) are -+ * skipped. -+ */ -+static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, -+ struct ubifs_pnode *pnode) -+{ -+ struct ubifs_nnode *nnode; -+ int iip; -+ -+ /* Try to go right */ -+ nnode = pnode->parent; -+ for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { -+ if (nnode->nbranch[iip].lnum) -+ return ubifs_get_pnode(c, nnode, iip); -+ } -+ -+ /* Go up while can't go right */ -+ do { -+ iip = nnode->iip + 1; -+ nnode = nnode->parent; -+ if (!nnode) -+ return NULL; -+ for (; iip < UBIFS_LPT_FANOUT; iip++) { -+ if (nnode->nbranch[iip].lnum) -+ break; -+ } -+ } while (iip >= UBIFS_LPT_FANOUT); -+ -+ /* Go right */ -+ nnode = ubifs_get_nnode(c, nnode, iip); -+ if (IS_ERR(nnode)) -+ return (void *)nnode; -+ -+ /* Go down to level 1 */ -+ while (nnode->level > 1) { -+ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) { -+ if (nnode->nbranch[iip].lnum) -+ break; -+ } -+ if (iip >= UBIFS_LPT_FANOUT) { -+ /* -+ * Should not happen, but we need to keep going -+ * if it does. -+ */ -+ iip = 0; -+ } -+ nnode = ubifs_get_nnode(c, nnode, iip); -+ if (IS_ERR(nnode)) -+ return (void *)nnode; -+ } -+ -+ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) -+ if (nnode->nbranch[iip].lnum) -+ break; -+ if (iip >= UBIFS_LPT_FANOUT) -+ /* Should not happen, but we need to keep going if it does */ -+ iip = 0; -+ return ubifs_get_pnode(c, nnode, iip); -+} -+ -+/** -+ * pnode_lookup - lookup a pnode in the LPT. -+ * @c: UBIFS file-system description object -+ * @i: pnode number (0 to main_lebs - 1) -+ * -+ * This function returns a pointer to the pnode on success or a negative -+ * error code on failure. -+ */ -+static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) -+{ -+ int err, h, iip, shft; -+ struct ubifs_nnode *nnode; -+ -+ if (!c->nroot) { -+ err = ubifs_read_nnode(c, NULL, 0); -+ if (err) -+ return ERR_PTR(err); -+ } -+ i <<= UBIFS_LPT_FANOUT_SHIFT; -+ nnode = c->nroot; -+ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; -+ for (h = 1; h < c->lpt_hght; h++) { -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ shft -= UBIFS_LPT_FANOUT_SHIFT; -+ nnode = ubifs_get_nnode(c, nnode, iip); -+ if (IS_ERR(nnode)) -+ return ERR_PTR(PTR_ERR(nnode)); -+ } -+ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); -+ return ubifs_get_pnode(c, nnode, iip); -+} -+ -+/** -+ * add_pnode_dirt - add dirty space to LPT LEB properties. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode for which to add dirt -+ */ -+static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) -+{ -+ ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, -+ c->pnode_sz); -+} -+ -+/** -+ * do_make_pnode_dirty - mark a pnode dirty. -+ * @c: UBIFS file-system description object -+ * @pnode: pnode to mark dirty -+ */ -+static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode) -+{ -+ /* Assumes cnext list is empty i.e. not called during commit */ -+ if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { -+ struct ubifs_nnode *nnode; -+ -+ c->dirty_pn_cnt += 1; -+ add_pnode_dirt(c, pnode); -+ /* Mark parent and ancestors dirty too */ -+ nnode = pnode->parent; -+ while (nnode) { -+ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { -+ c->dirty_nn_cnt += 1; -+ ubifs_add_nnode_dirt(c, nnode); -+ nnode = nnode->parent; -+ } else -+ break; -+ } -+ } -+} -+ -+/** -+ * make_tree_dirty - mark the entire LEB properties tree dirty. -+ * @c: UBIFS file-system description object -+ * -+ * This function is used by the "small" LPT model to cause the entire LEB -+ * properties tree to be written. The "small" LPT model does not use LPT -+ * garbage collection because it is more efficient to write the entire tree -+ * (because it is small). -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int make_tree_dirty(struct ubifs_info *c) -+{ -+ struct ubifs_pnode *pnode; -+ -+ pnode = pnode_lookup(c, 0); -+ while (pnode) { -+ do_make_pnode_dirty(c, pnode); -+ pnode = next_pnode_to_dirty(c, pnode); -+ if (IS_ERR(pnode)) -+ return PTR_ERR(pnode); -+ } -+ return 0; -+} -+ -+/** -+ * need_write_all - determine if the LPT area is running out of free space. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %1 if the LPT area is running out of free space and %0 -+ * if it is not. -+ */ -+static int need_write_all(struct ubifs_info *c) -+{ -+ long long free = 0; -+ int i; -+ -+ for (i = 0; i < c->lpt_lebs; i++) { -+ if (i + c->lpt_first == c->nhead_lnum) -+ free += c->leb_size - c->nhead_offs; -+ else if (c->ltab[i].free == c->leb_size) -+ free += c->leb_size; -+ else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size) -+ free += c->leb_size; -+ } -+ /* Less than twice the size left */ -+ if (free <= c->lpt_sz * 2) -+ return 1; -+ return 0; -+} -+ -+/** -+ * lpt_tgc_start - start trivial garbage collection of LPT LEBs. -+ * @c: UBIFS file-system description object -+ * -+ * LPT trivial garbage collection is where a LPT LEB contains only dirty and -+ * free space and so may be reused as soon as the next commit is completed. -+ * This function is called during start commit to mark LPT LEBs for trivial GC. -+ */ -+static void lpt_tgc_start(struct ubifs_info *c) -+{ -+ int i; -+ -+ for (i = 0; i < c->lpt_lebs; i++) { -+ if (i + c->lpt_first == c->nhead_lnum) -+ continue; -+ if (c->ltab[i].dirty > 0 && -+ c->ltab[i].free + c->ltab[i].dirty == c->leb_size) { -+ c->ltab[i].tgc = 1; -+ c->ltab[i].free = c->leb_size; -+ c->ltab[i].dirty = 0; -+ dbg_lp("LEB %d", i + c->lpt_first); -+ } -+ } -+} -+ -+/** -+ * lpt_tgc_end - end trivial garbage collection of LPT LEBs. -+ * @c: UBIFS file-system description object -+ * -+ * LPT trivial garbage collection is where a LPT LEB contains only dirty and -+ * free space and so may be reused as soon as the next commit is completed. -+ * This function is called after the commit is completed (master node has been -+ * written) and un-maps LPT LEBs that were marked for trivial GC. -+ */ -+static int lpt_tgc_end(struct ubifs_info *c) -+{ -+ int i, err; -+ -+ for (i = 0; i < c->lpt_lebs; i++) -+ if (c->ltab[i].tgc) { -+ err = ubifs_leb_unmap(c, i + c->lpt_first); -+ if (err) -+ return err; -+ c->ltab[i].tgc = 0; -+ dbg_lp("LEB %d", i + c->lpt_first); -+ } -+ return 0; -+} -+ -+/** -+ * populate_lsave - fill the lsave array with important LEB numbers. -+ * @c: the UBIFS file-system description object -+ * -+ * This function is only called for the "big" model. It records a small number -+ * of LEB numbers of important LEBs. Important LEBs are ones that are (from -+ * most important to least important): empty, freeable, freeable index, dirty -+ * index, dirty or free. Upon mount, we read this list of LEB numbers and bring -+ * their pnodes into memory. That will stop us from having to scan the LPT -+ * straight away. For the "small" model we assume that scanning the LPT is no -+ * big deal. -+ */ -+static void populate_lsave(struct ubifs_info *c) -+{ -+ struct ubifs_lprops *lprops; -+ struct ubifs_lpt_heap *heap; -+ int i, cnt = 0; -+ -+ ubifs_assert(c->big_lpt); -+ if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { -+ c->lpt_drty_flgs |= LSAVE_DIRTY; -+ ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); -+ } -+ list_for_each_entry(lprops, &c->empty_list, list) { -+ c->lsave[cnt++] = lprops->lnum; -+ if (cnt >= c->lsave_cnt) -+ return; -+ } -+ list_for_each_entry(lprops, &c->freeable_list, list) { -+ c->lsave[cnt++] = lprops->lnum; -+ if (cnt >= c->lsave_cnt) -+ return; -+ } -+ list_for_each_entry(lprops, &c->frdi_idx_list, list) { -+ c->lsave[cnt++] = lprops->lnum; -+ if (cnt >= c->lsave_cnt) -+ return; -+ } -+ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; -+ for (i = 0; i < heap->cnt; i++) { -+ c->lsave[cnt++] = heap->arr[i]->lnum; -+ if (cnt >= c->lsave_cnt) -+ return; -+ } -+ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; -+ for (i = 0; i < heap->cnt; i++) { -+ c->lsave[cnt++] = heap->arr[i]->lnum; -+ if (cnt >= c->lsave_cnt) -+ return; -+ } -+ heap = &c->lpt_heap[LPROPS_FREE - 1]; -+ for (i = 0; i < heap->cnt; i++) { -+ c->lsave[cnt++] = heap->arr[i]->lnum; -+ if (cnt >= c->lsave_cnt) -+ return; -+ } -+ /* Fill it up completely */ -+ while (cnt < c->lsave_cnt) -+ c->lsave[cnt++] = c->main_first; -+} -+ -+/** -+ * nnode_lookup - lookup a nnode in the LPT. -+ * @c: UBIFS file-system description object -+ * @i: nnode number -+ * -+ * This function returns a pointer to the nnode on success or a negative -+ * error code on failure. -+ */ -+static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i) -+{ -+ int err, iip; -+ struct ubifs_nnode *nnode; -+ -+ if (!c->nroot) { -+ err = ubifs_read_nnode(c, NULL, 0); -+ if (err) -+ return ERR_PTR(err); -+ } -+ nnode = c->nroot; -+ while (1) { -+ iip = i & (UBIFS_LPT_FANOUT - 1); -+ i >>= UBIFS_LPT_FANOUT_SHIFT; -+ if (!i) -+ break; -+ nnode = ubifs_get_nnode(c, nnode, iip); -+ if (IS_ERR(nnode)) -+ return nnode; -+ } -+ return nnode; -+} -+ -+/** -+ * make_nnode_dirty - find a nnode and, if found, make it dirty. -+ * @c: UBIFS file-system description object -+ * @node_num: nnode number of nnode to make dirty -+ * @lnum: LEB number where nnode was written -+ * @offs: offset where nnode was written -+ * -+ * This function is used by LPT garbage collection. LPT garbage collection is -+ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection -+ * simply involves marking all the nodes in the LEB being garbage-collected as -+ * dirty. The dirty nodes are written next commit, after which the LEB is free -+ * to be reused. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum, -+ int offs) -+{ -+ struct ubifs_nnode *nnode; -+ -+ nnode = nnode_lookup(c, node_num); -+ if (IS_ERR(nnode)) -+ return PTR_ERR(nnode); -+ if (nnode->parent) { -+ struct ubifs_nbranch *branch; -+ -+ branch = &nnode->parent->nbranch[nnode->iip]; -+ if (branch->lnum != lnum || branch->offs != offs) -+ return 0; /* nnode is obsolete */ -+ } else if (c->lpt_lnum != lnum || c->lpt_offs != offs) -+ return 0; /* nnode is obsolete */ -+ /* Assumes cnext list is empty i.e. not called during commit */ -+ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { -+ c->dirty_nn_cnt += 1; -+ ubifs_add_nnode_dirt(c, nnode); -+ /* Mark parent and ancestors dirty too */ -+ nnode = nnode->parent; -+ while (nnode) { -+ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { -+ c->dirty_nn_cnt += 1; -+ ubifs_add_nnode_dirt(c, nnode); -+ nnode = nnode->parent; -+ } else -+ break; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * make_pnode_dirty - find a pnode and, if found, make it dirty. -+ * @c: UBIFS file-system description object -+ * @node_num: pnode number of pnode to make dirty -+ * @lnum: LEB number where pnode was written -+ * @offs: offset where pnode was written -+ * -+ * This function is used by LPT garbage collection. LPT garbage collection is -+ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection -+ * simply involves marking all the nodes in the LEB being garbage-collected as -+ * dirty. The dirty nodes are written next commit, after which the LEB is free -+ * to be reused. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum, -+ int offs) -+{ -+ struct ubifs_pnode *pnode; -+ struct ubifs_nbranch *branch; -+ -+ pnode = pnode_lookup(c, node_num); -+ if (IS_ERR(pnode)) -+ return PTR_ERR(pnode); -+ branch = &pnode->parent->nbranch[pnode->iip]; -+ if (branch->lnum != lnum || branch->offs != offs) -+ return 0; -+ do_make_pnode_dirty(c, pnode); -+ return 0; -+} -+ -+/** -+ * make_ltab_dirty - make ltab node dirty. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number where ltab was written -+ * @offs: offset where ltab was written -+ * -+ * This function is used by LPT garbage collection. LPT garbage collection is -+ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection -+ * simply involves marking all the nodes in the LEB being garbage-collected as -+ * dirty. The dirty nodes are written next commit, after which the LEB is free -+ * to be reused. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs) -+{ -+ if (lnum != c->ltab_lnum || offs != c->ltab_offs) -+ return 0; /* This ltab node is obsolete */ -+ if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { -+ c->lpt_drty_flgs |= LTAB_DIRTY; -+ ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); -+ } -+ return 0; -+} -+ -+/** -+ * make_lsave_dirty - make lsave node dirty. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number where lsave was written -+ * @offs: offset where lsave was written -+ * -+ * This function is used by LPT garbage collection. LPT garbage collection is -+ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection -+ * simply involves marking all the nodes in the LEB being garbage-collected as -+ * dirty. The dirty nodes are written next commit, after which the LEB is free -+ * to be reused. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs) -+{ -+ if (lnum != c->lsave_lnum || offs != c->lsave_offs) -+ return 0; /* This lsave node is obsolete */ -+ if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { -+ c->lpt_drty_flgs |= LSAVE_DIRTY; -+ ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); -+ } -+ return 0; -+} -+ -+/** -+ * make_node_dirty - make node dirty. -+ * @c: UBIFS file-system description object -+ * @node_type: LPT node type -+ * @node_num: node number -+ * @lnum: LEB number where node was written -+ * @offs: offset where node was written -+ * -+ * This function is used by LPT garbage collection. LPT garbage collection is -+ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection -+ * simply involves marking all the nodes in the LEB being garbage-collected as -+ * dirty. The dirty nodes are written next commit, after which the LEB is free -+ * to be reused. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, -+ int lnum, int offs) -+{ -+ switch (node_type) { -+ case UBIFS_LPT_NNODE: -+ return make_nnode_dirty(c, node_num, lnum, offs); -+ case UBIFS_LPT_PNODE: -+ return make_pnode_dirty(c, node_num, lnum, offs); -+ case UBIFS_LPT_LTAB: -+ return make_ltab_dirty(c, lnum, offs); -+ case UBIFS_LPT_LSAVE: -+ return make_lsave_dirty(c, lnum, offs); -+ } -+ return -EINVAL; -+} -+ -+/** -+ * get_lpt_node_len - return the length of a node based on its type. -+ * @c: UBIFS file-system description object -+ * @node_type: LPT node type -+ */ -+static int get_lpt_node_len(const struct ubifs_info *c, int node_type) -+{ -+ switch (node_type) { -+ case UBIFS_LPT_NNODE: -+ return c->nnode_sz; -+ case UBIFS_LPT_PNODE: -+ return c->pnode_sz; -+ case UBIFS_LPT_LTAB: -+ return c->ltab_sz; -+ case UBIFS_LPT_LSAVE: -+ return c->lsave_sz; -+ } -+ return 0; -+} -+ -+/** -+ * get_pad_len - return the length of padding in a buffer. -+ * @c: UBIFS file-system description object -+ * @buf: buffer -+ * @len: length of buffer -+ */ -+static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len) -+{ -+ int offs, pad_len; -+ -+ if (c->min_io_size == 1) -+ return 0; -+ offs = c->leb_size - len; -+ pad_len = ALIGN(offs, c->min_io_size) - offs; -+ return pad_len; -+} -+ -+/** -+ * get_lpt_node_type - return type (and node number) of a node in a buffer. -+ * @c: UBIFS file-system description object -+ * @buf: buffer -+ * @node_num: node number is returned here -+ */ -+static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf, -+ int *node_num) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int pos = 0, node_type; -+ -+ node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); -+ *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); -+ return node_type; -+} -+ -+/** -+ * is_a_node - determine if a buffer contains a node. -+ * @c: UBIFS file-system description object -+ * @buf: buffer -+ * @len: length of buffer -+ * -+ * This function returns %1 if the buffer contains a node or %0 if it does not. -+ */ -+static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len) -+{ -+ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; -+ int pos = 0, node_type, node_len; -+ uint16_t crc, calc_crc; -+ -+ if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8) -+ return 0; -+ node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); -+ if (node_type == UBIFS_LPT_NOT_A_NODE) -+ return 0; -+ node_len = get_lpt_node_len(c, node_type); -+ if (!node_len || node_len > len) -+ return 0; -+ pos = 0; -+ addr = buf; -+ crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); -+ calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, -+ node_len - UBIFS_LPT_CRC_BYTES); -+ if (crc != calc_crc) -+ return 0; -+ return 1; -+} -+ -+/** -+ * lpt_gc_lnum - garbage collect a LPT LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to garbage collect -+ * -+ * LPT garbage collection is used only for the "big" LPT model -+ * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes -+ * in the LEB being garbage-collected as dirty. The dirty nodes are written -+ * next commit, after which the LEB is free to be reused. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int lpt_gc_lnum(struct ubifs_info *c, int lnum) -+{ -+ int err, len = c->leb_size, node_type, node_num, node_len, offs; -+ void *buf = c->lpt_buf; -+ -+ dbg_lp("LEB %d", lnum); -+ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); -+ if (err) { -+ ubifs_err("cannot read LEB %d, error %d", lnum, err); -+ return err; -+ } -+ while (1) { -+ if (!is_a_node(c, buf, len)) { -+ int pad_len; -+ -+ pad_len = get_pad_len(c, buf, len); -+ if (pad_len) { -+ buf += pad_len; -+ len -= pad_len; -+ continue; -+ } -+ return 0; -+ } -+ node_type = get_lpt_node_type(c, buf, &node_num); -+ node_len = get_lpt_node_len(c, node_type); -+ offs = c->leb_size - len; -+ ubifs_assert(node_len != 0); -+ mutex_lock(&c->lp_mutex); -+ err = make_node_dirty(c, node_type, node_num, lnum, offs); -+ mutex_unlock(&c->lp_mutex); -+ if (err) -+ return err; -+ buf += node_len; -+ len -= node_len; -+ } -+ return 0; -+} -+ -+/** -+ * lpt_gc - LPT garbage collection. -+ * @c: UBIFS file-system description object -+ * -+ * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'. -+ * Returns %0 on success and a negative error code on failure. -+ */ -+static int lpt_gc(struct ubifs_info *c) -+{ -+ int i, lnum = -1, dirty = 0; -+ -+ mutex_lock(&c->lp_mutex); -+ for (i = 0; i < c->lpt_lebs; i++) { -+ ubifs_assert(!c->ltab[i].tgc); -+ if (i + c->lpt_first == c->nhead_lnum || -+ c->ltab[i].free + c->ltab[i].dirty == c->leb_size) -+ continue; -+ if (c->ltab[i].dirty > dirty) { -+ dirty = c->ltab[i].dirty; -+ lnum = i + c->lpt_first; -+ } -+ } -+ mutex_unlock(&c->lp_mutex); -+ if (lnum == -1) -+ return -ENOSPC; -+ return lpt_gc_lnum(c, lnum); -+} -+ -+/** -+ * ubifs_lpt_start_commit - UBIFS commit starts. -+ * @c: the UBIFS file-system description object -+ * -+ * This function has to be called when UBIFS starts the commit operation. -+ * This function "freezes" all currently dirty LEB properties and does not -+ * change them anymore. Further changes are saved and tracked separately -+ * because they are not part of this commit. This function returns zero in case -+ * of success and a negative error code in case of failure. -+ */ -+int ubifs_lpt_start_commit(struct ubifs_info *c) -+{ -+ int err, cnt; -+ -+ dbg_lp(""); -+ -+ mutex_lock(&c->lp_mutex); -+ err = dbg_chk_lpt_free_spc(c); -+ if (err) -+ goto out; -+ err = dbg_check_ltab(c); -+ if (err) -+ goto out; -+ -+ if (c->check_lpt_free) { -+ /* -+ * We ensure there is enough free space in -+ * ubifs_lpt_post_commit() by marking nodes dirty. That -+ * information is lost when we unmount, so we also need -+ * to check free space once after mounting also. -+ */ -+ c->check_lpt_free = 0; -+ while (need_write_all(c)) { -+ mutex_unlock(&c->lp_mutex); -+ err = lpt_gc(c); -+ if (err) -+ return err; -+ mutex_lock(&c->lp_mutex); -+ } -+ } -+ -+ lpt_tgc_start(c); -+ -+ if (!c->dirty_pn_cnt) { -+ dbg_cmt("no cnodes to commit"); -+ err = 0; -+ goto out; -+ } -+ -+ if (!c->big_lpt && need_write_all(c)) { -+ /* If needed, write everything */ -+ err = make_tree_dirty(c); -+ if (err) -+ goto out; -+ lpt_tgc_start(c); -+ } -+ -+ if (c->big_lpt) -+ populate_lsave(c); -+ -+ cnt = get_cnodes_to_commit(c); -+ ubifs_assert(cnt != 0); -+ -+ err = layout_cnodes(c); -+ if (err) -+ goto out; -+ -+ /* Copy the LPT's own lprops for end commit to write */ -+ memcpy(c->ltab_cmt, c->ltab, -+ sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); -+ c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY); -+ -+out: -+ mutex_unlock(&c->lp_mutex); -+ return err; -+} -+ -+/** -+ * free_obsolete_cnodes - free obsolete cnodes for commit end. -+ * @c: UBIFS file-system description object -+ */ -+static void free_obsolete_cnodes(struct ubifs_info *c) -+{ -+ struct ubifs_cnode *cnode, *cnext; -+ -+ cnext = c->lpt_cnext; -+ if (!cnext) -+ return; -+ do { -+ cnode = cnext; -+ cnext = cnode->cnext; -+ if (test_bit(OBSOLETE_CNODE, &cnode->flags)) -+ kfree(cnode); -+ else -+ cnode->cnext = NULL; -+ } while (cnext != c->lpt_cnext); -+ c->lpt_cnext = NULL; -+} -+ -+/** -+ * ubifs_lpt_end_commit - finish the commit operation. -+ * @c: the UBIFS file-system description object -+ * -+ * This function has to be called when the commit operation finishes. It -+ * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to -+ * the media. Returns zero in case of success and a negative error code in case -+ * of failure. -+ */ -+int ubifs_lpt_end_commit(struct ubifs_info *c) -+{ -+ int err; -+ -+ dbg_lp(""); -+ -+ if (!c->lpt_cnext) -+ return 0; -+ -+ err = write_cnodes(c); -+ if (err) -+ return err; -+ -+ mutex_lock(&c->lp_mutex); -+ free_obsolete_cnodes(c); -+ mutex_unlock(&c->lp_mutex); -+ -+ return 0; -+} -+ -+/** -+ * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC. -+ * @c: UBIFS file-system description object -+ * -+ * LPT trivial GC is completed after a commit. Also LPT GC is done after a -+ * commit for the "big" LPT model. -+ */ -+int ubifs_lpt_post_commit(struct ubifs_info *c) -+{ -+ int err; -+ -+ mutex_lock(&c->lp_mutex); -+ err = lpt_tgc_end(c); -+ if (err) -+ goto out; -+ if (c->big_lpt) -+ while (need_write_all(c)) { -+ mutex_unlock(&c->lp_mutex); -+ err = lpt_gc(c); -+ if (err) -+ return err; -+ mutex_lock(&c->lp_mutex); -+ } -+out: -+ mutex_unlock(&c->lp_mutex); -+ return err; -+} -+ -+/** -+ * first_nnode - find the first nnode in memory. -+ * @c: UBIFS file-system description object -+ * @hght: height of tree where nnode found is returned here -+ * -+ * This function returns a pointer to the nnode found or %NULL if no nnode is -+ * found. This function is a helper to 'ubifs_lpt_free()'. -+ */ -+static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght) -+{ -+ struct ubifs_nnode *nnode; -+ int h, i, found; -+ -+ nnode = c->nroot; -+ *hght = 0; -+ if (!nnode) -+ return NULL; -+ for (h = 1; h < c->lpt_hght; h++) { -+ found = 0; -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ if (nnode->nbranch[i].nnode) { -+ found = 1; -+ nnode = nnode->nbranch[i].nnode; -+ *hght = h; -+ break; -+ } -+ } -+ if (!found) -+ break; -+ } -+ return nnode; -+} -+ -+/** -+ * next_nnode - find the next nnode in memory. -+ * @c: UBIFS file-system description object -+ * @nnode: nnode from which to start. -+ * @hght: height of tree where nnode is, is passed and returned here -+ * -+ * This function returns a pointer to the nnode found or %NULL if no nnode is -+ * found. This function is a helper to 'ubifs_lpt_free()'. -+ */ -+static struct ubifs_nnode *next_nnode(struct ubifs_info *c, -+ struct ubifs_nnode *nnode, int *hght) -+{ -+ struct ubifs_nnode *parent; -+ int iip, h, i, found; -+ -+ parent = nnode->parent; -+ if (!parent) -+ return NULL; -+ if (nnode->iip == UBIFS_LPT_FANOUT - 1) { -+ *hght -= 1; -+ return parent; -+ } -+ for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { -+ nnode = parent->nbranch[iip].nnode; -+ if (nnode) -+ break; -+ } -+ if (!nnode) { -+ *hght -= 1; -+ return parent; -+ } -+ for (h = *hght + 1; h < c->lpt_hght; h++) { -+ found = 0; -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ if (nnode->nbranch[i].nnode) { -+ found = 1; -+ nnode = nnode->nbranch[i].nnode; -+ *hght = h; -+ break; -+ } -+ } -+ if (!found) -+ break; -+ } -+ return nnode; -+} -+ -+/** -+ * ubifs_lpt_free - free resources owned by the LPT. -+ * @c: UBIFS file-system description object -+ * @wr_only: free only resources used for writing -+ */ -+void ubifs_lpt_free(struct ubifs_info *c, int wr_only) -+{ -+ struct ubifs_nnode *nnode; -+ int i, hght; -+ -+ /* Free write-only things first */ -+ -+ free_obsolete_cnodes(c); /* Leftover from a failed commit */ -+ -+ vfree(c->ltab_cmt); -+ c->ltab_cmt = NULL; -+ vfree(c->lpt_buf); -+ c->lpt_buf = NULL; -+ kfree(c->lsave); -+ c->lsave = NULL; -+ -+ if (wr_only) -+ return; -+ -+ /* Now free the rest */ -+ -+ nnode = first_nnode(c, &hght); -+ while (nnode) { -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) -+ kfree(nnode->nbranch[i].nnode); -+ nnode = next_nnode(c, nnode, &hght); -+ } -+ for (i = 0; i < LPROPS_HEAP_CNT; i++) -+ kfree(c->lpt_heap[i].arr); -+ kfree(c->dirty_idx.arr); -+ kfree(c->nroot); -+ vfree(c->ltab); -+ kfree(c->lpt_nod_buf); -+} -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+/** -+ * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes. -+ * @buf: buffer -+ * @len: buffer length -+ */ -+static int dbg_is_all_ff(uint8_t *buf, int len) -+{ -+ int i; -+ -+ for (i = 0; i < len; i++) -+ if (buf[i] != 0xff) -+ return 0; -+ return 1; -+} -+ -+/** -+ * dbg_is_nnode_dirty - determine if a nnode is dirty. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB number where nnode was written -+ * @offs: offset where nnode was written -+ */ -+static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) -+{ -+ struct ubifs_nnode *nnode; -+ int hght; -+ -+ /* Entire tree is in memory so first_nnode / next_nnode are OK */ -+ nnode = first_nnode(c, &hght); -+ for (; nnode; nnode = next_nnode(c, nnode, &hght)) { -+ struct ubifs_nbranch *branch; -+ -+ cond_resched(); -+ if (nnode->parent) { -+ branch = &nnode->parent->nbranch[nnode->iip]; -+ if (branch->lnum != lnum || branch->offs != offs) -+ continue; -+ if (test_bit(DIRTY_CNODE, &nnode->flags)) -+ return 1; -+ return 0; -+ } else { -+ if (c->lpt_lnum != lnum || c->lpt_offs != offs) -+ continue; -+ if (test_bit(DIRTY_CNODE, &nnode->flags)) -+ return 1; -+ return 0; -+ } -+ } -+ return 1; -+} -+ -+/** -+ * dbg_is_pnode_dirty - determine if a pnode is dirty. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB number where pnode was written -+ * @offs: offset where pnode was written -+ */ -+static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs) -+{ -+ int i, cnt; -+ -+ cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); -+ for (i = 0; i < cnt; i++) { -+ struct ubifs_pnode *pnode; -+ struct ubifs_nbranch *branch; -+ -+ cond_resched(); -+ pnode = pnode_lookup(c, i); -+ if (IS_ERR(pnode)) -+ return PTR_ERR(pnode); -+ branch = &pnode->parent->nbranch[pnode->iip]; -+ if (branch->lnum != lnum || branch->offs != offs) -+ continue; -+ if (test_bit(DIRTY_CNODE, &pnode->flags)) -+ return 1; -+ return 0; -+ } -+ return 1; -+} -+ -+/** -+ * dbg_is_ltab_dirty - determine if a ltab node is dirty. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB number where ltab node was written -+ * @offs: offset where ltab node was written -+ */ -+static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs) -+{ -+ if (lnum != c->ltab_lnum || offs != c->ltab_offs) -+ return 1; -+ return (c->lpt_drty_flgs & LTAB_DIRTY) != 0; -+} -+ -+/** -+ * dbg_is_lsave_dirty - determine if a lsave node is dirty. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB number where lsave node was written -+ * @offs: offset where lsave node was written -+ */ -+static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs) -+{ -+ if (lnum != c->lsave_lnum || offs != c->lsave_offs) -+ return 1; -+ return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0; -+} -+ -+/** -+ * dbg_is_node_dirty - determine if a node is dirty. -+ * @c: the UBIFS file-system description object -+ * @node_type: node type -+ * @lnum: LEB number where node was written -+ * @offs: offset where node was written -+ */ -+static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum, -+ int offs) -+{ -+ switch (node_type) { -+ case UBIFS_LPT_NNODE: -+ return dbg_is_nnode_dirty(c, lnum, offs); -+ case UBIFS_LPT_PNODE: -+ return dbg_is_pnode_dirty(c, lnum, offs); -+ case UBIFS_LPT_LTAB: -+ return dbg_is_ltab_dirty(c, lnum, offs); -+ case UBIFS_LPT_LSAVE: -+ return dbg_is_lsave_dirty(c, lnum, offs); -+ } -+ return 1; -+} -+ -+/** -+ * dbg_check_ltab_lnum - check the ltab for a LPT LEB number. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB number where node was written -+ * @offs: offset where node was written -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) -+{ -+ int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; -+ int ret; -+ void *buf = c->dbg->buf; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) -+ return 0; -+ -+ dbg_lp("LEB %d", lnum); -+ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); -+ if (err) { -+ dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); -+ return err; -+ } -+ while (1) { -+ if (!is_a_node(c, buf, len)) { -+ int i, pad_len; -+ -+ pad_len = get_pad_len(c, buf, len); -+ if (pad_len) { -+ buf += pad_len; -+ len -= pad_len; -+ dirty += pad_len; -+ continue; -+ } -+ if (!dbg_is_all_ff(buf, len)) { -+ dbg_msg("invalid empty space in LEB %d at %d", -+ lnum, c->leb_size - len); -+ err = -EINVAL; -+ } -+ i = lnum - c->lpt_first; -+ if (len != c->ltab[i].free) { -+ dbg_msg("invalid free space in LEB %d " -+ "(free %d, expected %d)", -+ lnum, len, c->ltab[i].free); -+ err = -EINVAL; -+ } -+ if (dirty != c->ltab[i].dirty) { -+ dbg_msg("invalid dirty space in LEB %d " -+ "(dirty %d, expected %d)", -+ lnum, dirty, c->ltab[i].dirty); -+ err = -EINVAL; -+ } -+ return err; -+ } -+ node_type = get_lpt_node_type(c, buf, &node_num); -+ node_len = get_lpt_node_len(c, node_type); -+ ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); -+ if (ret == 1) -+ dirty += node_len; -+ buf += node_len; -+ len -= node_len; -+ } -+} -+ -+/** -+ * dbg_check_ltab - check the free and dirty space in the ltab. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int dbg_check_ltab(struct ubifs_info *c) -+{ -+ int lnum, err, i, cnt; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) -+ return 0; -+ -+ /* Bring the entire tree into memory */ -+ cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); -+ for (i = 0; i < cnt; i++) { -+ struct ubifs_pnode *pnode; -+ -+ pnode = pnode_lookup(c, i); -+ if (IS_ERR(pnode)) -+ return PTR_ERR(pnode); -+ cond_resched(); -+ } -+ -+ /* Check nodes */ -+ err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0); -+ if (err) -+ return err; -+ -+ /* Check each LEB */ -+ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { -+ err = dbg_check_ltab_lnum(c, lnum); -+ if (err) { -+ dbg_err("failed at LEB %d", lnum); -+ return err; -+ } -+ } -+ -+ dbg_lp("succeeded"); -+ return 0; -+} -+ -+/** -+ * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT. -+ * @c: the UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int dbg_chk_lpt_free_spc(struct ubifs_info *c) -+{ -+ long long free = 0; -+ int i; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) -+ return 0; -+ -+ for (i = 0; i < c->lpt_lebs; i++) { -+ if (c->ltab[i].tgc || c->ltab[i].cmt) -+ continue; -+ if (i + c->lpt_first == c->nhead_lnum) -+ free += c->leb_size - c->nhead_offs; -+ else if (c->ltab[i].free == c->leb_size) -+ free += c->leb_size; -+ } -+ if (free < c->lpt_sz) { -+ dbg_err("LPT space error: free %lld lpt_sz %lld", -+ free, c->lpt_sz); -+ dbg_dump_lpt_info(c); -+ dbg_dump_lpt_lebs(c); -+ dump_stack(); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/** -+ * dbg_chk_lpt_sz - check LPT does not write more than LPT size. -+ * @c: the UBIFS file-system description object -+ * @action: action -+ * @len: length written -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) -+{ -+ struct ubifs_debug_info *d = c->dbg; -+ long long chk_lpt_sz, lpt_sz; -+ int err = 0; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) -+ return 0; -+ -+ switch (action) { -+ case 0: -+ d->chk_lpt_sz = 0; -+ d->chk_lpt_sz2 = 0; -+ d->chk_lpt_lebs = 0; -+ d->chk_lpt_wastage = 0; -+ if (c->dirty_pn_cnt > c->pnode_cnt) { -+ dbg_err("dirty pnodes %d exceed max %d", -+ c->dirty_pn_cnt, c->pnode_cnt); -+ err = -EINVAL; -+ } -+ if (c->dirty_nn_cnt > c->nnode_cnt) { -+ dbg_err("dirty nnodes %d exceed max %d", -+ c->dirty_nn_cnt, c->nnode_cnt); -+ err = -EINVAL; -+ } -+ return err; -+ case 1: -+ d->chk_lpt_sz += len; -+ return 0; -+ case 2: -+ d->chk_lpt_sz += len; -+ d->chk_lpt_wastage += len; -+ d->chk_lpt_lebs += 1; -+ return 0; -+ case 3: -+ chk_lpt_sz = c->leb_size; -+ chk_lpt_sz *= d->chk_lpt_lebs; -+ chk_lpt_sz += len - c->nhead_offs; -+ if (d->chk_lpt_sz != chk_lpt_sz) { -+ dbg_err("LPT wrote %lld but space used was %lld", -+ d->chk_lpt_sz, chk_lpt_sz); -+ err = -EINVAL; -+ } -+ if (d->chk_lpt_sz > c->lpt_sz) { -+ dbg_err("LPT wrote %lld but lpt_sz is %lld", -+ d->chk_lpt_sz, c->lpt_sz); -+ err = -EINVAL; -+ } -+ if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { -+ dbg_err("LPT layout size %lld but wrote %lld", -+ d->chk_lpt_sz, d->chk_lpt_sz2); -+ err = -EINVAL; -+ } -+ if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { -+ dbg_err("LPT new nhead offs: expected %d was %d", -+ d->new_nhead_offs, len); -+ err = -EINVAL; -+ } -+ lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; -+ lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; -+ lpt_sz += c->ltab_sz; -+ if (c->big_lpt) -+ lpt_sz += c->lsave_sz; -+ if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { -+ dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", -+ d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); -+ err = -EINVAL; -+ } -+ if (err) { -+ dbg_dump_lpt_info(c); -+ dbg_dump_lpt_lebs(c); -+ dump_stack(); -+ } -+ d->chk_lpt_sz2 = d->chk_lpt_sz; -+ d->chk_lpt_sz = 0; -+ d->chk_lpt_wastage = 0; -+ d->chk_lpt_lebs = 0; -+ d->new_nhead_offs = len; -+ return err; -+ case 4: -+ d->chk_lpt_sz += len; -+ d->chk_lpt_wastage += len; -+ return 0; -+ default: -+ return -EINVAL; -+ } -+} -+ -+/** -+ * dbg_dump_lpt_leb - dump an LPT LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to dump -+ * -+ * This function dumps an LEB from LPT area. Nodes in this area are very -+ * different to nodes in the main area (e.g., they do not have common headers, -+ * they do not have 8-byte alignments, etc), so we have a separate function to -+ * dump LPT area LEBs. Note, LPT has to be locked by the caller. -+ */ -+static void dump_lpt_leb(const struct ubifs_info *c, int lnum) -+{ -+ int err, len = c->leb_size, node_type, node_num, node_len, offs; -+ void *buf = c->dbg->buf; -+ -+ printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", -+ current->pid, lnum); -+ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); -+ if (err) { -+ ubifs_err("cannot read LEB %d, error %d", lnum, err); -+ return; -+ } -+ while (1) { -+ offs = c->leb_size - len; -+ if (!is_a_node(c, buf, len)) { -+ int pad_len; -+ -+ pad_len = get_pad_len(c, buf, len); -+ if (pad_len) { -+ printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", -+ lnum, offs, pad_len); -+ buf += pad_len; -+ len -= pad_len; -+ continue; -+ } -+ if (len) -+ printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", -+ lnum, offs, len); -+ break; -+ } -+ -+ node_type = get_lpt_node_type(c, buf, &node_num); -+ switch (node_type) { -+ case UBIFS_LPT_PNODE: -+ { -+ node_len = c->pnode_sz; -+ if (c->big_lpt) -+ printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", -+ lnum, offs, node_num); -+ else -+ printk(KERN_DEBUG "LEB %d:%d, pnode\n", -+ lnum, offs); -+ break; -+ } -+ case UBIFS_LPT_NNODE: -+ { -+ int i; -+ struct ubifs_nnode nnode; -+ -+ node_len = c->nnode_sz; -+ if (c->big_lpt) -+ printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", -+ lnum, offs, node_num); -+ else -+ printk(KERN_DEBUG "LEB %d:%d, nnode, ", -+ lnum, offs); -+ err = ubifs_unpack_nnode(c, buf, &nnode); -+ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { -+ printk("%d:%d", nnode.nbranch[i].lnum, -+ nnode.nbranch[i].offs); -+ if (i != UBIFS_LPT_FANOUT - 1) -+ printk(", "); -+ } -+ printk("\n"); -+ break; -+ } -+ case UBIFS_LPT_LTAB: -+ node_len = c->ltab_sz; -+ printk(KERN_DEBUG "LEB %d:%d, ltab\n", -+ lnum, offs); -+ break; -+ case UBIFS_LPT_LSAVE: -+ node_len = c->lsave_sz; -+ printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); -+ break; -+ default: -+ ubifs_err("LPT node type %d not recognized", node_type); -+ return; -+ } -+ -+ buf += node_len; -+ len -= node_len; -+ } -+ -+ printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", -+ current->pid, lnum); -+} -+ -+/** -+ * dbg_dump_lpt_lebs - dump LPT lebs. -+ * @c: UBIFS file-system description object -+ * -+ * This function dumps all LPT LEBs. The caller has to make sure the LPT is -+ * locked. -+ */ -+void dbg_dump_lpt_lebs(const struct ubifs_info *c) -+{ -+ int i; -+ -+ printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", -+ current->pid); -+ for (i = 0; i < c->lpt_lebs; i++) -+ dump_lpt_leb(c, i + c->lpt_first); -+ printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", -+ current->pid); -+} -+ -+#endif /* CONFIG_UBIFS_FS_DEBUG */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/master.c linux-2.6.24/fs/ubifs/master.c ---- linux-2.6.24.orig/fs/ubifs/master.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/master.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,387 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* This file implements reading and writing the master node */ -+ -+#include "ubifs.h" -+ -+/** -+ * scan_for_master - search the valid master node. -+ * @c: UBIFS file-system description object -+ * -+ * This function scans the master node LEBs and search for the latest master -+ * node. Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+static int scan_for_master(struct ubifs_info *c) -+{ -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ int lnum, offs = 0, nodes_cnt; -+ -+ lnum = UBIFS_MST_LNUM; -+ -+ sleb = ubifs_scan(c, lnum, 0, c->sbuf); -+ if (IS_ERR(sleb)) -+ return PTR_ERR(sleb); -+ nodes_cnt = sleb->nodes_cnt; -+ if (nodes_cnt > 0) { -+ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, -+ list); -+ if (snod->type != UBIFS_MST_NODE) -+ goto out; -+ memcpy(c->mst_node, snod->node, snod->len); -+ offs = snod->offs; -+ } -+ ubifs_scan_destroy(sleb); -+ -+ lnum += 1; -+ -+ sleb = ubifs_scan(c, lnum, 0, c->sbuf); -+ if (IS_ERR(sleb)) -+ return PTR_ERR(sleb); -+ if (sleb->nodes_cnt != nodes_cnt) -+ goto out; -+ if (!sleb->nodes_cnt) -+ goto out; -+ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); -+ if (snod->type != UBIFS_MST_NODE) -+ goto out; -+ if (snod->offs != offs) -+ goto out; -+ if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, -+ (void *)snod->node + UBIFS_CH_SZ, -+ UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) -+ goto out; -+ c->mst_offs = offs; -+ ubifs_scan_destroy(sleb); -+ return 0; -+ -+out: -+ ubifs_scan_destroy(sleb); -+ return -EINVAL; -+} -+ -+/** -+ * validate_master - validate master node. -+ * @c: UBIFS file-system description object -+ * -+ * This function validates data which was read from master node. Returns zero -+ * if the data is all right and %-EINVAL if not. -+ */ -+static int validate_master(const struct ubifs_info *c) -+{ -+ long long main_sz; -+ int err; -+ -+ if (c->max_sqnum >= SQNUM_WATERMARK) { -+ err = 1; -+ goto out; -+ } -+ -+ if (c->cmt_no >= c->max_sqnum) { -+ err = 2; -+ goto out; -+ } -+ -+ if (c->highest_inum >= INUM_WATERMARK) { -+ err = 3; -+ goto out; -+ } -+ -+ if (c->lhead_lnum < UBIFS_LOG_LNUM || -+ c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs || -+ c->lhead_offs < 0 || c->lhead_offs >= c->leb_size || -+ c->lhead_offs & (c->min_io_size - 1)) { -+ err = 4; -+ goto out; -+ } -+ -+ if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first || -+ c->zroot.offs >= c->leb_size || c->zroot.offs & 7) { -+ err = 5; -+ goto out; -+ } -+ -+ if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len || -+ c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) { -+ err = 6; -+ goto out; -+ } -+ -+ if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) { -+ err = 7; -+ goto out; -+ } -+ -+ if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first || -+ c->ihead_offs % c->min_io_size || c->ihead_offs < 0 || -+ c->ihead_offs > c->leb_size || c->ihead_offs & 7) { -+ err = 8; -+ goto out; -+ } -+ -+ main_sz = (long long)c->main_lebs * c->leb_size; -+ if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { -+ err = 9; -+ goto out; -+ } -+ -+ if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last || -+ c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) { -+ err = 10; -+ goto out; -+ } -+ -+ if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last || -+ c->nhead_offs < 0 || c->nhead_offs % c->min_io_size || -+ c->nhead_offs > c->leb_size) { -+ err = 11; -+ goto out; -+ } -+ -+ if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last || -+ c->ltab_offs < 0 || -+ c->ltab_offs + c->ltab_sz > c->leb_size) { -+ err = 12; -+ goto out; -+ } -+ -+ if (c->big_lpt && (c->lsave_lnum < c->lpt_first || -+ c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 || -+ c->lsave_offs + c->lsave_sz > c->leb_size)) { -+ err = 13; -+ goto out; -+ } -+ -+ if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) { -+ err = 14; -+ goto out; -+ } -+ -+ if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) { -+ err = 15; -+ goto out; -+ } -+ -+ if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) { -+ err = 16; -+ goto out; -+ } -+ -+ if (c->lst.total_free < 0 || c->lst.total_free > main_sz || -+ c->lst.total_free & 7) { -+ err = 17; -+ goto out; -+ } -+ -+ if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) { -+ err = 18; -+ goto out; -+ } -+ -+ if (c->lst.total_used < 0 || (c->lst.total_used & 7)) { -+ err = 19; -+ goto out; -+ } -+ -+ if (c->lst.total_free + c->lst.total_dirty + -+ c->lst.total_used > main_sz) { -+ err = 20; -+ goto out; -+ } -+ -+ if (c->lst.total_dead + c->lst.total_dark + -+ c->lst.total_used + c->old_idx_sz > main_sz) { -+ err = 21; -+ goto out; -+ } -+ -+ if (c->lst.total_dead < 0 || -+ c->lst.total_dead > c->lst.total_free + c->lst.total_dirty || -+ c->lst.total_dead & 7) { -+ err = 22; -+ goto out; -+ } -+ -+ if (c->lst.total_dark < 0 || -+ c->lst.total_dark > c->lst.total_free + c->lst.total_dirty || -+ c->lst.total_dark & 7) { -+ err = 23; -+ goto out; -+ } -+ -+ return 0; -+ -+out: -+ ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); -+ dbg_dump_node(c, c->mst_node); -+ return -EINVAL; -+} -+ -+/** -+ * ubifs_read_master - read master node. -+ * @c: UBIFS file-system description object -+ * -+ * This function finds and reads the master node during file-system mount. If -+ * the flash is empty, it creates default master node as well. Returns zero in -+ * case of success and a negative error code in case of failure. -+ */ -+int ubifs_read_master(struct ubifs_info *c) -+{ -+ int err, old_leb_cnt; -+ -+ c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL); -+ if (!c->mst_node) -+ return -ENOMEM; -+ -+ err = scan_for_master(c); -+ if (err) { -+ err = ubifs_recover_master_node(c); -+ if (err) -+ /* -+ * Note, we do not free 'c->mst_node' here because the -+ * unmount routine will take care of this. -+ */ -+ return err; -+ } -+ -+ /* Make sure that the recovery flag is clear */ -+ c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY); -+ -+ c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum); -+ c->highest_inum = le64_to_cpu(c->mst_node->highest_inum); -+ c->cmt_no = le64_to_cpu(c->mst_node->cmt_no); -+ c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum); -+ c->zroot.offs = le32_to_cpu(c->mst_node->root_offs); -+ c->zroot.len = le32_to_cpu(c->mst_node->root_len); -+ c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum); -+ c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); -+ c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); -+ c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); -+ c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); -+ c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); -+ c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); -+ c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); -+ c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs); -+ c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum); -+ c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs); -+ c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum); -+ c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs); -+ c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum); -+ c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs); -+ c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs); -+ old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt); -+ c->lst.total_free = le64_to_cpu(c->mst_node->total_free); -+ c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty); -+ c->lst.total_used = le64_to_cpu(c->mst_node->total_used); -+ c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); -+ c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); -+ -+ c->calc_idx_sz = c->old_idx_sz; -+ -+ if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) -+ c->no_orphs = 1; -+ -+ if (old_leb_cnt != c->leb_cnt) { -+ /* The file system has been resized */ -+ int growth = c->leb_cnt - old_leb_cnt; -+ -+ if (c->leb_cnt < old_leb_cnt || -+ c->leb_cnt < UBIFS_MIN_LEB_CNT) { -+ ubifs_err("bad leb_cnt on master node"); -+ dbg_dump_node(c, c->mst_node); -+ return -EINVAL; -+ } -+ -+ dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs", -+ old_leb_cnt, c->leb_cnt); -+ c->lst.empty_lebs += growth; -+ c->lst.total_free += growth * (long long)c->leb_size; -+ c->lst.total_dark += growth * (long long)c->dark_wm; -+ -+ /* -+ * Reflect changes back onto the master node. N.B. the master -+ * node gets written immediately whenever mounting (or -+ * remounting) in read-write mode, so we do not need to write it -+ * here. -+ */ -+ c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt); -+ c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs); -+ c->mst_node->total_free = cpu_to_le64(c->lst.total_free); -+ c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark); -+ } -+ -+ err = validate_master(c); -+ if (err) -+ return err; -+ -+ err = dbg_old_index_check_init(c, &c->zroot); -+ -+ return err; -+} -+ -+/** -+ * ubifs_write_master - write master node. -+ * @c: UBIFS file-system description object -+ * -+ * This function writes the master node. The caller has to take the -+ * @c->mst_mutex lock before calling this function. Returns zero in case of -+ * success and a negative error code in case of failure. The master node is -+ * written twice to enable recovery. -+ */ -+int ubifs_write_master(struct ubifs_info *c) -+{ -+ int err, lnum, offs, len; -+ -+ if (c->ro_media) -+ return -EROFS; -+ -+ lnum = UBIFS_MST_LNUM; -+ offs = c->mst_offs + c->mst_node_alsz; -+ len = UBIFS_MST_NODE_SZ; -+ -+ if (offs + UBIFS_MST_NODE_SZ > c->leb_size) { -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ offs = 0; -+ } -+ -+ c->mst_offs = offs; -+ c->mst_node->highest_inum = cpu_to_le64(c->highest_inum); -+ -+ err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); -+ if (err) -+ return err; -+ -+ lnum += 1; -+ -+ if (offs == 0) { -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } -+ err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); -+ -+ return err; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/misc.h linux-2.6.24/fs/ubifs/misc.h ---- linux-2.6.24.orig/fs/ubifs/misc.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/misc.h 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,340 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file contains miscellaneous helper functions. -+ */ -+ -+#ifndef __UBIFS_MISC_H__ -+#define __UBIFS_MISC_H__ -+ -+/** -+ * ubifs_zn_dirty - check if znode is dirty. -+ * @znode: znode to check -+ * -+ * This helper function returns %1 if @znode is dirty and %0 otherwise. -+ */ -+static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) -+{ -+ return !!test_bit(DIRTY_ZNODE, &znode->flags); -+} -+ -+/** -+ * ubifs_wake_up_bgt - wake up background thread. -+ * @c: UBIFS file-system description object -+ */ -+static inline void ubifs_wake_up_bgt(struct ubifs_info *c) -+{ -+ if (c->bgt && !c->need_bgt) { -+ c->need_bgt = 1; -+ wake_up_process(c->bgt); -+ } -+} -+ -+/** -+ * ubifs_tnc_find_child - find next child in znode. -+ * @znode: znode to search at -+ * @start: the zbranch index to start at -+ * -+ * This helper function looks for znode child starting at index @start. Returns -+ * the child or %NULL if no children were found. -+ */ -+static inline struct ubifs_znode * -+ubifs_tnc_find_child(struct ubifs_znode *znode, int start) -+{ -+ while (start < znode->child_cnt) { -+ if (znode->zbranch[start].znode) -+ return znode->zbranch[start].znode; -+ start += 1; -+ } -+ -+ return NULL; -+} -+ -+/** -+ * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object. -+ * @inode: the VFS 'struct inode' pointer -+ */ -+static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) -+{ -+ return container_of(inode, struct ubifs_inode, vfs_inode); -+} -+ -+/** -+ * ubifs_compr_present - check if compressor was compiled in. -+ * @compr_type: compressor type to check -+ * -+ * This function returns %1 of compressor of type @compr_type is present, and -+ * %0 if not. -+ */ -+static inline int ubifs_compr_present(int compr_type) -+{ -+ ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); -+ return !!ubifs_compressors[compr_type]->capi_name; -+} -+ -+/** -+ * ubifs_compr_name - get compressor name string by its type. -+ * @compr_type: compressor type -+ * -+ * This function returns compressor type string. -+ */ -+static inline const char *ubifs_compr_name(int compr_type) -+{ -+ ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); -+ return ubifs_compressors[compr_type]->name; -+} -+ -+/** -+ * ubifs_wbuf_sync - synchronize write-buffer. -+ * @wbuf: write-buffer to synchronize -+ * -+ * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume -+ * that the write-buffer is already locked. -+ */ -+static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) -+{ -+ int err; -+ -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ err = ubifs_wbuf_sync_nolock(wbuf); -+ mutex_unlock(&wbuf->io_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_leb_unmap - unmap an LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to unmap -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) -+{ -+ int err; -+ -+ if (c->ro_media) -+ return -EROFS; -+ err = ubi_leb_unmap(c->ubi, lnum); -+ if (err) { -+ ubifs_err("unmap LEB %d failed, error %d", lnum, err); -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * ubifs_leb_write - write to a LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to write -+ * @buf: buffer to write from -+ * @offs: offset within LEB to write to -+ * @len: length to write -+ * @dtype: data type -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, -+ const void *buf, int offs, int len, int dtype) -+{ -+ int err; -+ -+ if (c->ro_media) -+ return -EROFS; -+ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); -+ if (err) { -+ ubifs_err("writing %d bytes at %d:%d, error %d", -+ len, lnum, offs, err); -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * ubifs_leb_change - atomic LEB change. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number to write -+ * @buf: buffer to write from -+ * @len: length to write -+ * @dtype: data type -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, -+ const void *buf, int len, int dtype) -+{ -+ int err; -+ -+ if (c->ro_media) -+ return -EROFS; -+ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); -+ if (err) { -+ ubifs_err("changing %d bytes in LEB %d, error %d", -+ len, lnum, err); -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * ubifs_encode_dev - encode device node IDs. -+ * @dev: UBIFS device node information -+ * @rdev: device IDs to encode -+ * -+ * This is a helper function which encodes major/minor numbers of a device node -+ * into UBIFS device node description. We use standard Linux "new" and "huge" -+ * encodings. -+ */ -+static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) -+{ -+ if (new_valid_dev(rdev)) { -+ dev->new = cpu_to_le32(new_encode_dev(rdev)); -+ return sizeof(dev->new); -+ } else { -+ dev->huge = cpu_to_le64(huge_encode_dev(rdev)); -+ return sizeof(dev->huge); -+ } -+} -+ -+/** -+ * ubifs_add_dirt - add dirty space to LEB properties. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB to add dirty space for -+ * @dirty: dirty space to add -+ * -+ * This is a helper function which increased amount of dirty LEB space. Returns -+ * zero in case of success and a negative error code in case of failure. -+ */ -+static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty) -+{ -+ return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0); -+} -+ -+/** -+ * ubifs_return_leb - return LEB to lprops. -+ * @c: the UBIFS file-system description object -+ * @lnum: LEB to return -+ * -+ * This helper function cleans the "taken" flag of a logical eraseblock in the -+ * lprops. Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+static inline int ubifs_return_leb(struct ubifs_info *c, int lnum) -+{ -+ return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, -+ LPROPS_TAKEN, 0); -+} -+ -+/** -+ * ubifs_idx_node_sz - return index node size. -+ * @c: the UBIFS file-system description object -+ * @child_cnt: number of children of this index node -+ */ -+static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt) -+{ -+ return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt; -+} -+ -+/** -+ * ubifs_idx_branch - return pointer to an index branch. -+ * @c: the UBIFS file-system description object -+ * @idx: index node -+ * @bnum: branch number -+ */ -+static inline -+struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, -+ const struct ubifs_idx_node *idx, -+ int bnum) -+{ -+ return (struct ubifs_branch *)((void *)idx->branches + -+ (UBIFS_BRANCH_SZ + c->key_len) * bnum); -+} -+ -+/** -+ * ubifs_idx_key - return pointer to an index key. -+ * @c: the UBIFS file-system description object -+ * @idx: index node -+ */ -+static inline void *ubifs_idx_key(const struct ubifs_info *c, -+ const struct ubifs_idx_node *idx) -+{ -+ return (void *)((struct ubifs_branch *)idx->branches)->key; -+} -+ -+/** -+ * ubifs_current_time - round current time to time granularity. -+ * @inode: inode -+ */ -+static inline struct timespec ubifs_current_time(struct inode *inode) -+{ -+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? -+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -+} -+ -+/** -+ * ubifs_tnc_lookup - look up a file-system node. -+ * @c: UBIFS file-system description object -+ * @key: node key to lookup -+ * @node: the node is returned here -+ * -+ * This function look up and reads node with key @key. The caller has to make -+ * sure the @node buffer is large enough to fit the node. Returns zero in case -+ * of success, %-ENOENT if the node was not found, and a negative error code in -+ * case of failure. -+ */ -+static inline int ubifs_tnc_lookup(struct ubifs_info *c, -+ const union ubifs_key *key, void *node) -+{ -+ return ubifs_tnc_locate(c, key, node, NULL, NULL); -+} -+ -+/** -+ * ubifs_get_lprops - get reference to LEB properties. -+ * @c: the UBIFS file-system description object -+ * -+ * This function locks lprops. Lprops have to be unlocked by -+ * 'ubifs_release_lprops()'. -+ */ -+static inline void ubifs_get_lprops(struct ubifs_info *c) -+{ -+ mutex_lock(&c->lp_mutex); -+} -+ -+/** -+ * ubifs_release_lprops - release lprops lock. -+ * @c: the UBIFS file-system description object -+ * -+ * This function has to be called after each 'ubifs_get_lprops()' call to -+ * unlock lprops. -+ */ -+static inline void ubifs_release_lprops(struct ubifs_info *c) -+{ -+ ubifs_assert(mutex_is_locked(&c->lp_mutex)); -+ ubifs_assert(c->lst.empty_lebs >= 0 && -+ c->lst.empty_lebs <= c->main_lebs); -+ mutex_unlock(&c->lp_mutex); -+} -+ -+#endif /* __UBIFS_MISC_H__ */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/orphan.c linux-2.6.24/fs/ubifs/orphan.c ---- linux-2.6.24.orig/fs/ubifs/orphan.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/orphan.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,962 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Author: Adrian Hunter -+ */ -+ -+#include "ubifs.h" -+ -+/* -+ * An orphan is an inode number whose inode node has been committed to the index -+ * with a link count of zero. That happens when an open file is deleted -+ * (unlinked) and then a commit is run. In the normal course of events the inode -+ * would be deleted when the file is closed. However in the case of an unclean -+ * unmount, orphans need to be accounted for. After an unclean unmount, the -+ * orphans' inodes must be deleted which means either scanning the entire index -+ * looking for them, or keeping a list on flash somewhere. This unit implements -+ * the latter approach. -+ * -+ * The orphan area is a fixed number of LEBs situated between the LPT area and -+ * the main area. The number of orphan area LEBs is specified when the file -+ * system is created. The minimum number is 1. The size of the orphan area -+ * should be so that it can hold the maximum number of orphans that are expected -+ * to ever exist at one time. -+ * -+ * The number of orphans that can fit in a LEB is: -+ * -+ * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) -+ * -+ * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. -+ * -+ * Orphans are accumulated in a rb-tree. When an inode's link count drops to -+ * zero, the inode number is added to the rb-tree. It is removed from the tree -+ * when the inode is deleted. Any new orphans that are in the orphan tree when -+ * the commit is run, are written to the orphan area in 1 or more orphan nodes. -+ * If the orphan area is full, it is consolidated to make space. There is -+ * always enough space because validation prevents the user from creating more -+ * than the maximum number of orphans allowed. -+ */ -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+static int dbg_check_orphans(struct ubifs_info *c); -+#else -+#define dbg_check_orphans(c) 0 -+#endif -+ -+/** -+ * ubifs_add_orphan - add an orphan. -+ * @c: UBIFS file-system description object -+ * @inum: orphan inode number -+ * -+ * Add an orphan. This function is called when an inodes link count drops to -+ * zero. -+ */ -+int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) -+{ -+ struct ubifs_orphan *orphan, *o; -+ struct rb_node **p, *parent = NULL; -+ -+ orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); -+ if (!orphan) -+ return -ENOMEM; -+ orphan->inum = inum; -+ orphan->new = 1; -+ -+ spin_lock(&c->orphan_lock); -+ if (c->tot_orphans >= c->max_orphans) { -+ spin_unlock(&c->orphan_lock); -+ kfree(orphan); -+ return -ENFILE; -+ } -+ p = &c->orph_tree.rb_node; -+ while (*p) { -+ parent = *p; -+ o = rb_entry(parent, struct ubifs_orphan, rb); -+ if (inum < o->inum) -+ p = &(*p)->rb_left; -+ else if (inum > o->inum) -+ p = &(*p)->rb_right; -+ else { -+ dbg_err("orphaned twice"); -+ spin_unlock(&c->orphan_lock); -+ kfree(orphan); -+ return 0; -+ } -+ } -+ c->tot_orphans += 1; -+ c->new_orphans += 1; -+ rb_link_node(&orphan->rb, parent, p); -+ rb_insert_color(&orphan->rb, &c->orph_tree); -+ list_add_tail(&orphan->list, &c->orph_list); -+ list_add_tail(&orphan->new_list, &c->orph_new); -+ spin_unlock(&c->orphan_lock); -+ dbg_gen("ino %lu", (unsigned long)inum); -+ return 0; -+} -+ -+/** -+ * ubifs_delete_orphan - delete an orphan. -+ * @c: UBIFS file-system description object -+ * @inum: orphan inode number -+ * -+ * Delete an orphan. This function is called when an inode is deleted. -+ */ -+void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) -+{ -+ struct ubifs_orphan *o; -+ struct rb_node *p; -+ -+ spin_lock(&c->orphan_lock); -+ p = c->orph_tree.rb_node; -+ while (p) { -+ o = rb_entry(p, struct ubifs_orphan, rb); -+ if (inum < o->inum) -+ p = p->rb_left; -+ else if (inum > o->inum) -+ p = p->rb_right; -+ else { -+ if (o->dnext) { -+ spin_unlock(&c->orphan_lock); -+ dbg_gen("deleted twice ino %lu", -+ (unsigned long)inum); -+ return; -+ } -+ if (o->cnext) { -+ o->dnext = c->orph_dnext; -+ c->orph_dnext = o; -+ spin_unlock(&c->orphan_lock); -+ dbg_gen("delete later ino %lu", -+ (unsigned long)inum); -+ return; -+ } -+ rb_erase(p, &c->orph_tree); -+ list_del(&o->list); -+ c->tot_orphans -= 1; -+ if (o->new) { -+ list_del(&o->new_list); -+ c->new_orphans -= 1; -+ } -+ spin_unlock(&c->orphan_lock); -+ kfree(o); -+ dbg_gen("inum %lu", (unsigned long)inum); -+ return; -+ } -+ } -+ spin_unlock(&c->orphan_lock); -+ dbg_err("missing orphan ino %lu", (unsigned long)inum); -+ dbg_dump_stack(); -+} -+ -+/** -+ * ubifs_orphan_start_commit - start commit of orphans. -+ * @c: UBIFS file-system description object -+ * -+ * Start commit of orphans. -+ */ -+int ubifs_orphan_start_commit(struct ubifs_info *c) -+{ -+ struct ubifs_orphan *orphan, **last; -+ -+ spin_lock(&c->orphan_lock); -+ last = &c->orph_cnext; -+ list_for_each_entry(orphan, &c->orph_new, new_list) { -+ ubifs_assert(orphan->new); -+ orphan->new = 0; -+ *last = orphan; -+ last = &orphan->cnext; -+ } -+ *last = orphan->cnext; -+ c->cmt_orphans = c->new_orphans; -+ c->new_orphans = 0; -+ dbg_cmt("%d orphans to commit", c->cmt_orphans); -+ INIT_LIST_HEAD(&c->orph_new); -+ if (c->tot_orphans == 0) -+ c->no_orphs = 1; -+ else -+ c->no_orphs = 0; -+ spin_unlock(&c->orphan_lock); -+ return 0; -+} -+ -+/** -+ * avail_orphs - calculate available space. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns the number of orphans that can be written in the -+ * available space. -+ */ -+static int avail_orphs(struct ubifs_info *c) -+{ -+ int avail_lebs, avail, gap; -+ -+ avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1; -+ avail = avail_lebs * -+ ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); -+ gap = c->leb_size - c->ohead_offs; -+ if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64)) -+ avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); -+ return avail; -+} -+ -+/** -+ * tot_avail_orphs - calculate total space. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns the number of orphans that can be written in half -+ * the total space. That leaves half the space for adding new orphans. -+ */ -+static int tot_avail_orphs(struct ubifs_info *c) -+{ -+ int avail_lebs, avail; -+ -+ avail_lebs = c->orph_lebs; -+ avail = avail_lebs * -+ ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); -+ return avail / 2; -+} -+ -+/** -+ * do_write_orph_node - write a node to the orphan head. -+ * @c: UBIFS file-system description object -+ * @len: length of node -+ * @atomic: write atomically -+ * -+ * This function writes a node to the orphan head from the orphan buffer. If -+ * %atomic is not zero, then the write is done atomically. On success, %0 is -+ * returned, otherwise a negative error code is returned. -+ */ -+static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) -+{ -+ int err = 0; -+ -+ if (atomic) { -+ ubifs_assert(c->ohead_offs == 0); -+ ubifs_prepare_node(c, c->orph_buf, len, 1); -+ len = ALIGN(len, c->min_io_size); -+ err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len, -+ UBI_SHORTTERM); -+ } else { -+ if (c->ohead_offs == 0) { -+ /* Ensure LEB has been unmapped */ -+ err = ubifs_leb_unmap(c, c->ohead_lnum); -+ if (err) -+ return err; -+ } -+ err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum, -+ c->ohead_offs, UBI_SHORTTERM); -+ } -+ return err; -+} -+ -+/** -+ * write_orph_node - write an orphan node. -+ * @c: UBIFS file-system description object -+ * @atomic: write atomically -+ * -+ * This function builds an orphan node from the cnext list and writes it to the -+ * orphan head. On success, %0 is returned, otherwise a negative error code -+ * is returned. -+ */ -+static int write_orph_node(struct ubifs_info *c, int atomic) -+{ -+ struct ubifs_orphan *orphan, *cnext; -+ struct ubifs_orph_node *orph; -+ int gap, err, len, cnt, i; -+ -+ ubifs_assert(c->cmt_orphans > 0); -+ gap = c->leb_size - c->ohead_offs; -+ if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) { -+ c->ohead_lnum += 1; -+ c->ohead_offs = 0; -+ gap = c->leb_size; -+ if (c->ohead_lnum > c->orph_last) { -+ /* -+ * We limit the number of orphans so that this should -+ * never happen. -+ */ -+ ubifs_err("out of space in orphan area"); -+ return -EINVAL; -+ } -+ } -+ cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); -+ if (cnt > c->cmt_orphans) -+ cnt = c->cmt_orphans; -+ len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64); -+ ubifs_assert(c->orph_buf); -+ orph = c->orph_buf; -+ orph->ch.node_type = UBIFS_ORPH_NODE; -+ spin_lock(&c->orphan_lock); -+ cnext = c->orph_cnext; -+ for (i = 0; i < cnt; i++) { -+ orphan = cnext; -+ orph->inos[i] = cpu_to_le64(orphan->inum); -+ cnext = orphan->cnext; -+ orphan->cnext = NULL; -+ } -+ c->orph_cnext = cnext; -+ c->cmt_orphans -= cnt; -+ spin_unlock(&c->orphan_lock); -+ if (c->cmt_orphans) -+ orph->cmt_no = cpu_to_le64(c->cmt_no); -+ else -+ /* Mark the last node of the commit */ -+ orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63)); -+ ubifs_assert(c->ohead_offs + len <= c->leb_size); -+ ubifs_assert(c->ohead_lnum >= c->orph_first); -+ ubifs_assert(c->ohead_lnum <= c->orph_last); -+ err = do_write_orph_node(c, len, atomic); -+ c->ohead_offs += ALIGN(len, c->min_io_size); -+ c->ohead_offs = ALIGN(c->ohead_offs, 8); -+ return err; -+} -+ -+/** -+ * write_orph_nodes - write orphan nodes until there are no more to commit. -+ * @c: UBIFS file-system description object -+ * @atomic: write atomically -+ * -+ * This function writes orphan nodes for all the orphans to commit. On success, -+ * %0 is returned, otherwise a negative error code is returned. -+ */ -+static int write_orph_nodes(struct ubifs_info *c, int atomic) -+{ -+ int err; -+ -+ while (c->cmt_orphans > 0) { -+ err = write_orph_node(c, atomic); -+ if (err) -+ return err; -+ } -+ if (atomic) { -+ int lnum; -+ -+ /* Unmap any unused LEBs after consolidation */ -+ lnum = c->ohead_lnum + 1; -+ for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * consolidate - consolidate the orphan area. -+ * @c: UBIFS file-system description object -+ * -+ * This function enables consolidation by putting all the orphans into the list -+ * to commit. The list is in the order that the orphans were added, and the -+ * LEBs are written atomically in order, so at no time can orphans be lost by -+ * an unclean unmount. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int consolidate(struct ubifs_info *c) -+{ -+ int tot_avail = tot_avail_orphs(c), err = 0; -+ -+ spin_lock(&c->orphan_lock); -+ dbg_cmt("there is space for %d orphans and there are %d", -+ tot_avail, c->tot_orphans); -+ if (c->tot_orphans - c->new_orphans <= tot_avail) { -+ struct ubifs_orphan *orphan, **last; -+ int cnt = 0; -+ -+ /* Change the cnext list to include all non-new orphans */ -+ last = &c->orph_cnext; -+ list_for_each_entry(orphan, &c->orph_list, list) { -+ if (orphan->new) -+ continue; -+ *last = orphan; -+ last = &orphan->cnext; -+ cnt += 1; -+ } -+ *last = orphan->cnext; -+ ubifs_assert(cnt == c->tot_orphans - c->new_orphans); -+ c->cmt_orphans = cnt; -+ c->ohead_lnum = c->orph_first; -+ c->ohead_offs = 0; -+ } else { -+ /* -+ * We limit the number of orphans so that this should -+ * never happen. -+ */ -+ ubifs_err("out of space in orphan area"); -+ err = -EINVAL; -+ } -+ spin_unlock(&c->orphan_lock); -+ return err; -+} -+ -+/** -+ * commit_orphans - commit orphans. -+ * @c: UBIFS file-system description object -+ * -+ * This function commits orphans to flash. On success, %0 is returned, -+ * otherwise a negative error code is returned. -+ */ -+static int commit_orphans(struct ubifs_info *c) -+{ -+ int avail, atomic = 0, err; -+ -+ ubifs_assert(c->cmt_orphans > 0); -+ avail = avail_orphs(c); -+ if (avail < c->cmt_orphans) { -+ /* Not enough space to write new orphans, so consolidate */ -+ err = consolidate(c); -+ if (err) -+ return err; -+ atomic = 1; -+ } -+ err = write_orph_nodes(c, atomic); -+ return err; -+} -+ -+/** -+ * erase_deleted - erase the orphans marked for deletion. -+ * @c: UBIFS file-system description object -+ * -+ * During commit, the orphans being committed cannot be deleted, so they are -+ * marked for deletion and deleted by this function. Also, the recovery -+ * adds killed orphans to the deletion list, and therefore they are deleted -+ * here too. -+ */ -+static void erase_deleted(struct ubifs_info *c) -+{ -+ struct ubifs_orphan *orphan, *dnext; -+ -+ spin_lock(&c->orphan_lock); -+ dnext = c->orph_dnext; -+ while (dnext) { -+ orphan = dnext; -+ dnext = orphan->dnext; -+ ubifs_assert(!orphan->new); -+ rb_erase(&orphan->rb, &c->orph_tree); -+ list_del(&orphan->list); -+ c->tot_orphans -= 1; -+ dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); -+ kfree(orphan); -+ } -+ c->orph_dnext = NULL; -+ spin_unlock(&c->orphan_lock); -+} -+ -+/** -+ * ubifs_orphan_end_commit - end commit of orphans. -+ * @c: UBIFS file-system description object -+ * -+ * End commit of orphans. -+ */ -+int ubifs_orphan_end_commit(struct ubifs_info *c) -+{ -+ int err; -+ -+ if (c->cmt_orphans != 0) { -+ err = commit_orphans(c); -+ if (err) -+ return err; -+ } -+ erase_deleted(c); -+ err = dbg_check_orphans(c); -+ return err; -+} -+ -+/** -+ * ubifs_clear_orphans - erase all LEBs used for orphans. -+ * @c: UBIFS file-system description object -+ * -+ * If recovery is not required, then the orphans from the previous session -+ * are not needed. This function locates the LEBs used to record -+ * orphans, and un-maps them. -+ */ -+int ubifs_clear_orphans(struct ubifs_info *c) -+{ -+ int lnum, err; -+ -+ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } -+ c->ohead_lnum = c->orph_first; -+ c->ohead_offs = 0; -+ return 0; -+} -+ -+/** -+ * insert_dead_orphan - insert an orphan. -+ * @c: UBIFS file-system description object -+ * @inum: orphan inode number -+ * -+ * This function is a helper to the 'do_kill_orphans()' function. The orphan -+ * must be kept until the next commit, so it is added to the rb-tree and the -+ * deletion list. -+ */ -+static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) -+{ -+ struct ubifs_orphan *orphan, *o; -+ struct rb_node **p, *parent = NULL; -+ -+ orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); -+ if (!orphan) -+ return -ENOMEM; -+ orphan->inum = inum; -+ -+ p = &c->orph_tree.rb_node; -+ while (*p) { -+ parent = *p; -+ o = rb_entry(parent, struct ubifs_orphan, rb); -+ if (inum < o->inum) -+ p = &(*p)->rb_left; -+ else if (inum > o->inum) -+ p = &(*p)->rb_right; -+ else { -+ /* Already added - no problem */ -+ kfree(orphan); -+ return 0; -+ } -+ } -+ c->tot_orphans += 1; -+ rb_link_node(&orphan->rb, parent, p); -+ rb_insert_color(&orphan->rb, &c->orph_tree); -+ list_add_tail(&orphan->list, &c->orph_list); -+ orphan->dnext = c->orph_dnext; -+ c->orph_dnext = orphan; -+ dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, -+ c->new_orphans, c->tot_orphans); -+ return 0; -+} -+ -+/** -+ * do_kill_orphans - remove orphan inodes from the index. -+ * @c: UBIFS file-system description object -+ * @sleb: scanned LEB -+ * @last_cmt_no: cmt_no of last orphan node read is passed and returned here -+ * @outofdate: whether the LEB is out of date is returned here -+ * @last_flagged: whether the end orphan node is encountered -+ * -+ * This function is a helper to the 'kill_orphans()' function. It goes through -+ * every orphan node in a LEB and for every inode number recorded, removes -+ * all keys for that inode from the TNC. -+ */ -+static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, -+ unsigned long long *last_cmt_no, int *outofdate, -+ int *last_flagged) -+{ -+ struct ubifs_scan_node *snod; -+ struct ubifs_orph_node *orph; -+ unsigned long long cmt_no; -+ ino_t inum; -+ int i, n, err, first = 1; -+ -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ if (snod->type != UBIFS_ORPH_NODE) { -+ ubifs_err("invalid node type %d in orphan area at " -+ "%d:%d", snod->type, sleb->lnum, snod->offs); -+ dbg_dump_node(c, snod->node); -+ return -EINVAL; -+ } -+ -+ orph = snod->node; -+ -+ /* Check commit number */ -+ cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; -+ /* -+ * The commit number on the master node may be less, because -+ * of a failed commit. If there are several failed commits in a -+ * row, the commit number written on orphan nodes will continue -+ * to increase (because the commit number is adjusted here) even -+ * though the commit number on the master node stays the same -+ * because the master node has not been re-written. -+ */ -+ if (cmt_no > c->cmt_no) -+ c->cmt_no = cmt_no; -+ if (cmt_no < *last_cmt_no && *last_flagged) { -+ /* -+ * The last orphan node had a higher commit number and -+ * was flagged as the last written for that commit -+ * number. That makes this orphan node, out of date. -+ */ -+ if (!first) { -+ ubifs_err("out of order commit number %llu in " -+ "orphan node at %d:%d", -+ cmt_no, sleb->lnum, snod->offs); -+ dbg_dump_node(c, snod->node); -+ return -EINVAL; -+ } -+ dbg_rcvry("out of date LEB %d", sleb->lnum); -+ *outofdate = 1; -+ return 0; -+ } -+ -+ if (first) -+ first = 0; -+ -+ n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; -+ for (i = 0; i < n; i++) { -+ inum = le64_to_cpu(orph->inos[i]); -+ dbg_rcvry("deleting orphaned inode %lu", -+ (unsigned long)inum); -+ err = ubifs_tnc_remove_ino(c, inum); -+ if (err) -+ return err; -+ err = insert_dead_orphan(c, inum); -+ if (err) -+ return err; -+ } -+ -+ *last_cmt_no = cmt_no; -+ if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { -+ dbg_rcvry("last orph node for commit %llu at %d:%d", -+ cmt_no, sleb->lnum, snod->offs); -+ *last_flagged = 1; -+ } else -+ *last_flagged = 0; -+ } -+ -+ return 0; -+} -+ -+/** -+ * kill_orphans - remove all orphan inodes from the index. -+ * @c: UBIFS file-system description object -+ * -+ * If recovery is required, then orphan inodes recorded during the previous -+ * session (which ended with an unclean unmount) must be deleted from the index. -+ * This is done by updating the TNC, but since the index is not updated until -+ * the next commit, the LEBs where the orphan information is recorded are not -+ * erased until the next commit. -+ */ -+static int kill_orphans(struct ubifs_info *c) -+{ -+ unsigned long long last_cmt_no = 0; -+ int lnum, err = 0, outofdate = 0, last_flagged = 0; -+ -+ c->ohead_lnum = c->orph_first; -+ c->ohead_offs = 0; -+ /* Check no-orphans flag and skip this if no orphans */ -+ if (c->no_orphs) { -+ dbg_rcvry("no orphans"); -+ return 0; -+ } -+ /* -+ * Orph nodes always start at c->orph_first and are written to each -+ * successive LEB in turn. Generally unused LEBs will have been unmapped -+ * but may contain out of date orphan nodes if the unmap didn't go -+ * through. In addition, the last orphan node written for each commit is -+ * marked (top bit of orph->cmt_no is set to 1). It is possible that -+ * there are orphan nodes from the next commit (i.e. the commit did not -+ * complete successfully). In that case, no orphans will have been lost -+ * due to the way that orphans are written, and any orphans added will -+ * be valid orphans anyway and so can be deleted. -+ */ -+ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { -+ struct ubifs_scan_leb *sleb; -+ -+ dbg_rcvry("LEB %d", lnum); -+ sleb = ubifs_scan(c, lnum, 0, c->sbuf); -+ if (IS_ERR(sleb)) { -+ sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); -+ if (IS_ERR(sleb)) { -+ err = PTR_ERR(sleb); -+ break; -+ } -+ } -+ err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate, -+ &last_flagged); -+ if (err || outofdate) { -+ ubifs_scan_destroy(sleb); -+ break; -+ } -+ if (sleb->endpt) { -+ c->ohead_lnum = lnum; -+ c->ohead_offs = sleb->endpt; -+ } -+ ubifs_scan_destroy(sleb); -+ } -+ return err; -+} -+ -+/** -+ * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. -+ * @c: UBIFS file-system description object -+ * @unclean: indicates recovery from unclean unmount -+ * @read_only: indicates read only mount -+ * -+ * This function is called when mounting to erase orphans from the previous -+ * session. If UBIFS was not unmounted cleanly, then the inodes recorded as -+ * orphans are deleted. -+ */ -+int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) -+{ -+ int err = 0; -+ -+ c->max_orphans = tot_avail_orphs(c); -+ -+ if (!read_only) { -+ c->orph_buf = vmalloc(c->leb_size); -+ if (!c->orph_buf) -+ return -ENOMEM; -+ } -+ -+ if (unclean) -+ err = kill_orphans(c); -+ else if (!read_only) -+ err = ubifs_clear_orphans(c); -+ -+ return err; -+} -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ -+struct check_orphan { -+ struct rb_node rb; -+ ino_t inum; -+}; -+ -+struct check_info { -+ unsigned long last_ino; -+ unsigned long tot_inos; -+ unsigned long missing; -+ unsigned long long leaf_cnt; -+ struct ubifs_ino_node *node; -+ struct rb_root root; -+}; -+ -+static int dbg_find_orphan(struct ubifs_info *c, ino_t inum) -+{ -+ struct ubifs_orphan *o; -+ struct rb_node *p; -+ -+ spin_lock(&c->orphan_lock); -+ p = c->orph_tree.rb_node; -+ while (p) { -+ o = rb_entry(p, struct ubifs_orphan, rb); -+ if (inum < o->inum) -+ p = p->rb_left; -+ else if (inum > o->inum) -+ p = p->rb_right; -+ else { -+ spin_unlock(&c->orphan_lock); -+ return 1; -+ } -+ } -+ spin_unlock(&c->orphan_lock); -+ return 0; -+} -+ -+static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum) -+{ -+ struct check_orphan *orphan, *o; -+ struct rb_node **p, *parent = NULL; -+ -+ orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS); -+ if (!orphan) -+ return -ENOMEM; -+ orphan->inum = inum; -+ -+ p = &root->rb_node; -+ while (*p) { -+ parent = *p; -+ o = rb_entry(parent, struct check_orphan, rb); -+ if (inum < o->inum) -+ p = &(*p)->rb_left; -+ else if (inum > o->inum) -+ p = &(*p)->rb_right; -+ else { -+ kfree(orphan); -+ return 0; -+ } -+ } -+ rb_link_node(&orphan->rb, parent, p); -+ rb_insert_color(&orphan->rb, root); -+ return 0; -+} -+ -+static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) -+{ -+ struct check_orphan *o; -+ struct rb_node *p; -+ -+ p = root->rb_node; -+ while (p) { -+ o = rb_entry(p, struct check_orphan, rb); -+ if (inum < o->inum) -+ p = p->rb_left; -+ else if (inum > o->inum) -+ p = p->rb_right; -+ else -+ return 1; -+ } -+ return 0; -+} -+ -+static void dbg_free_check_tree(struct rb_root *root) -+{ -+ struct rb_node *this = root->rb_node; -+ struct check_orphan *o; -+ -+ while (this) { -+ if (this->rb_left) { -+ this = this->rb_left; -+ continue; -+ } else if (this->rb_right) { -+ this = this->rb_right; -+ continue; -+ } -+ o = rb_entry(this, struct check_orphan, rb); -+ this = rb_parent(this); -+ if (this) { -+ if (this->rb_left == &o->rb) -+ this->rb_left = NULL; -+ else -+ this->rb_right = NULL; -+ } -+ kfree(o); -+ } -+} -+ -+static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ void *priv) -+{ -+ struct check_info *ci = priv; -+ ino_t inum; -+ int err; -+ -+ inum = key_inum(c, &zbr->key); -+ if (inum != ci->last_ino) { -+ /* Lowest node type is the inode node, so it comes first */ -+ if (key_type(c, &zbr->key) != UBIFS_INO_KEY) -+ ubifs_err("found orphan node ino %lu, type %d", -+ (unsigned long)inum, key_type(c, &zbr->key)); -+ ci->last_ino = inum; -+ ci->tot_inos += 1; -+ err = ubifs_tnc_read_node(c, zbr, ci->node); -+ if (err) { -+ ubifs_err("node read failed, error %d", err); -+ return err; -+ } -+ if (ci->node->nlink == 0) -+ /* Must be recorded as an orphan */ -+ if (!dbg_find_check_orphan(&ci->root, inum) && -+ !dbg_find_orphan(c, inum)) { -+ ubifs_err("missing orphan, ino %lu", -+ (unsigned long)inum); -+ ci->missing += 1; -+ } -+ } -+ ci->leaf_cnt += 1; -+ return 0; -+} -+ -+static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) -+{ -+ struct ubifs_scan_node *snod; -+ struct ubifs_orph_node *orph; -+ ino_t inum; -+ int i, n, err; -+ -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ cond_resched(); -+ if (snod->type != UBIFS_ORPH_NODE) -+ continue; -+ orph = snod->node; -+ n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; -+ for (i = 0; i < n; i++) { -+ inum = le64_to_cpu(orph->inos[i]); -+ err = dbg_ins_check_orphan(&ci->root, inum); -+ if (err) -+ return err; -+ } -+ } -+ return 0; -+} -+ -+static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) -+{ -+ int lnum, err = 0; -+ -+ /* Check no-orphans flag and skip this if no orphans */ -+ if (c->no_orphs) -+ return 0; -+ -+ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { -+ struct ubifs_scan_leb *sleb; -+ -+ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); -+ if (IS_ERR(sleb)) { -+ err = PTR_ERR(sleb); -+ break; -+ } -+ -+ err = dbg_read_orphans(ci, sleb); -+ ubifs_scan_destroy(sleb); -+ if (err) -+ break; -+ } -+ -+ return err; -+} -+ -+static int dbg_check_orphans(struct ubifs_info *c) -+{ -+ struct check_info ci; -+ int err; -+ -+ if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) -+ return 0; -+ -+ ci.last_ino = 0; -+ ci.tot_inos = 0; -+ ci.missing = 0; -+ ci.leaf_cnt = 0; -+ ci.root = RB_ROOT; -+ ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); -+ if (!ci.node) { -+ ubifs_err("out of memory"); -+ return -ENOMEM; -+ } -+ -+ err = dbg_scan_orphans(c, &ci); -+ if (err) -+ goto out; -+ -+ err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); -+ if (err) { -+ ubifs_err("cannot scan TNC, error %d", err); -+ goto out; -+ } -+ -+ if (ci.missing) { -+ ubifs_err("%lu missing orphan(s)", ci.missing); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ dbg_cmt("last inode number is %lu", ci.last_ino); -+ dbg_cmt("total number of inodes is %lu", ci.tot_inos); -+ dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt); -+ -+out: -+ dbg_free_check_tree(&ci.root); -+ kfree(ci.node); -+ return err; -+} -+ -+#endif /* CONFIG_UBIFS_FS_DEBUG */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/recovery.c linux-2.6.24/fs/ubifs/recovery.c ---- linux-2.6.24.orig/fs/ubifs/recovery.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/recovery.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1520 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements functions needed to recover from unclean un-mounts. -+ * When UBIFS is mounted, it checks a flag on the master node to determine if -+ * an un-mount was completed sucessfully. If not, the process of mounting -+ * incorparates additional checking and fixing of on-flash data structures. -+ * UBIFS always cleans away all remnants of an unclean un-mount, so that -+ * errors do not accumulate. However UBIFS defers recovery if it is mounted -+ * read-only, and the flash is not modified in that case. -+ */ -+ -+#include <linux/crc32.h> -+#include "ubifs.h" -+ -+/** -+ * is_empty - determine whether a buffer is empty (contains all 0xff). -+ * @buf: buffer to clean -+ * @len: length of buffer -+ * -+ * This function returns %1 if the buffer is empty (contains all 0xff) otherwise -+ * %0 is returned. -+ */ -+static int is_empty(void *buf, int len) -+{ -+ uint8_t *p = buf; -+ int i; -+ -+ for (i = 0; i < len; i++) -+ if (*p++ != 0xff) -+ return 0; -+ return 1; -+} -+ -+/** -+ * get_master_node - get the last valid master node allowing for corruption. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number -+ * @pbuf: buffer containing the LEB read, is returned here -+ * @mst: master node, if found, is returned here -+ * @cor: corruption, if found, is returned here -+ * -+ * This function allocates a buffer, reads the LEB into it, and finds and -+ * returns the last valid master node allowing for one area of corruption. -+ * The corrupt area, if there is one, must be consistent with the assumption -+ * that it is the result of an unclean unmount while the master node was being -+ * written. Under those circumstances, it is valid to use the previously written -+ * master node. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, -+ struct ubifs_mst_node **mst, void **cor) -+{ -+ const int sz = c->mst_node_alsz; -+ int err, offs, len; -+ void *sbuf, *buf; -+ -+ sbuf = vmalloc(c->leb_size); -+ if (!sbuf) -+ return -ENOMEM; -+ -+ err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); -+ if (err && err != -EBADMSG) -+ goto out_free; -+ -+ /* Find the first position that is definitely not a node */ -+ offs = 0; -+ buf = sbuf; -+ len = c->leb_size; -+ while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { -+ struct ubifs_ch *ch = buf; -+ -+ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) -+ break; -+ offs += sz; -+ buf += sz; -+ len -= sz; -+ } -+ /* See if there was a valid master node before that */ -+ if (offs) { -+ int ret; -+ -+ offs -= sz; -+ buf -= sz; -+ len += sz; -+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); -+ if (ret != SCANNED_A_NODE && offs) { -+ /* Could have been corruption so check one place back */ -+ offs -= sz; -+ buf -= sz; -+ len += sz; -+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); -+ if (ret != SCANNED_A_NODE) -+ /* -+ * We accept only one area of corruption because -+ * we are assuming that it was caused while -+ * trying to write a master node. -+ */ -+ goto out_err; -+ } -+ if (ret == SCANNED_A_NODE) { -+ struct ubifs_ch *ch = buf; -+ -+ if (ch->node_type != UBIFS_MST_NODE) -+ goto out_err; -+ dbg_rcvry("found a master node at %d:%d", lnum, offs); -+ *mst = buf; -+ offs += sz; -+ buf += sz; -+ len -= sz; -+ } -+ } -+ /* Check for corruption */ -+ if (offs < c->leb_size) { -+ if (!is_empty(buf, min_t(int, len, sz))) { -+ *cor = buf; -+ dbg_rcvry("found corruption at %d:%d", lnum, offs); -+ } -+ offs += sz; -+ buf += sz; -+ len -= sz; -+ } -+ /* Check remaining empty space */ -+ if (offs < c->leb_size) -+ if (!is_empty(buf, len)) -+ goto out_err; -+ *pbuf = sbuf; -+ return 0; -+ -+out_err: -+ err = -EINVAL; -+out_free: -+ vfree(sbuf); -+ *mst = NULL; -+ *cor = NULL; -+ return err; -+} -+ -+/** -+ * write_rcvrd_mst_node - write recovered master node. -+ * @c: UBIFS file-system description object -+ * @mst: master node -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int write_rcvrd_mst_node(struct ubifs_info *c, -+ struct ubifs_mst_node *mst) -+{ -+ int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; -+ __le32 save_flags; -+ -+ dbg_rcvry("recovery"); -+ -+ save_flags = mst->flags; -+ mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); -+ -+ ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); -+ err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); -+ if (err) -+ goto out; -+ err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); -+ if (err) -+ goto out; -+out: -+ mst->flags = save_flags; -+ return err; -+} -+ -+/** -+ * ubifs_recover_master_node - recover the master node. -+ * @c: UBIFS file-system description object -+ * -+ * This function recovers the master node from corruption that may occur due to -+ * an unclean unmount. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_recover_master_node(struct ubifs_info *c) -+{ -+ void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; -+ struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; -+ const int sz = c->mst_node_alsz; -+ int err, offs1, offs2; -+ -+ dbg_rcvry("recovery"); -+ -+ err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); -+ if (err) -+ goto out_free; -+ -+ err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); -+ if (err) -+ goto out_free; -+ -+ if (mst1) { -+ offs1 = (void *)mst1 - buf1; -+ if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && -+ (offs1 == 0 && !cor1)) { -+ /* -+ * mst1 was written by recovery at offset 0 with no -+ * corruption. -+ */ -+ dbg_rcvry("recovery recovery"); -+ mst = mst1; -+ } else if (mst2) { -+ offs2 = (void *)mst2 - buf2; -+ if (offs1 == offs2) { -+ /* Same offset, so must be the same */ -+ if (memcmp((void *)mst1 + UBIFS_CH_SZ, -+ (void *)mst2 + UBIFS_CH_SZ, -+ UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) -+ goto out_err; -+ mst = mst1; -+ } else if (offs2 + sz == offs1) { -+ /* 1st LEB was written, 2nd was not */ -+ if (cor1) -+ goto out_err; -+ mst = mst1; -+ } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { -+ /* 1st LEB was unmapped and written, 2nd not */ -+ if (cor1) -+ goto out_err; -+ mst = mst1; -+ } else -+ goto out_err; -+ } else { -+ /* -+ * 2nd LEB was unmapped and about to be written, so -+ * there must be only one master node in the first LEB -+ * and no corruption. -+ */ -+ if (offs1 != 0 || cor1) -+ goto out_err; -+ mst = mst1; -+ } -+ } else { -+ if (!mst2) -+ goto out_err; -+ /* -+ * 1st LEB was unmapped and about to be written, so there must -+ * be no room left in 2nd LEB. -+ */ -+ offs2 = (void *)mst2 - buf2; -+ if (offs2 + sz + sz <= c->leb_size) -+ goto out_err; -+ mst = mst2; -+ } -+ -+ dbg_rcvry("recovered master node from LEB %d", -+ (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); -+ -+ memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); -+ -+ if ((c->vfs_sb->s_flags & MS_RDONLY)) { -+ /* Read-only mode. Keep a copy for switching to rw mode */ -+ c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); -+ if (!c->rcvrd_mst_node) { -+ err = -ENOMEM; -+ goto out_free; -+ } -+ memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); -+ } else { -+ /* Write the recovered master node */ -+ c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; -+ err = write_rcvrd_mst_node(c, c->mst_node); -+ if (err) -+ goto out_free; -+ } -+ -+ vfree(buf2); -+ vfree(buf1); -+ -+ return 0; -+ -+out_err: -+ err = -EINVAL; -+out_free: -+ ubifs_err("failed to recover master node"); -+ if (mst1) { -+ dbg_err("dumping first master node"); -+ dbg_dump_node(c, mst1); -+ } -+ if (mst2) { -+ dbg_err("dumping second master node"); -+ dbg_dump_node(c, mst2); -+ } -+ vfree(buf2); -+ vfree(buf1); -+ return err; -+} -+ -+/** -+ * ubifs_write_rcvrd_mst_node - write the recovered master node. -+ * @c: UBIFS file-system description object -+ * -+ * This function writes the master node that was recovered during mounting in -+ * read-only mode and must now be written because we are remounting rw. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) -+{ -+ int err; -+ -+ if (!c->rcvrd_mst_node) -+ return 0; -+ c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); -+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); -+ err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); -+ if (err) -+ return err; -+ kfree(c->rcvrd_mst_node); -+ c->rcvrd_mst_node = NULL; -+ return 0; -+} -+ -+/** -+ * is_last_write - determine if an offset was in the last write to a LEB. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to check -+ * @offs: offset to check -+ * -+ * This function returns %1 if @offs was in the last write to the LEB whose data -+ * is in @buf, otherwise %0 is returned. The determination is made by checking -+ * for subsequent empty space starting from the next min_io_size boundary (or a -+ * bit less than the common header size if min_io_size is one). -+ */ -+static int is_last_write(const struct ubifs_info *c, void *buf, int offs) -+{ -+ int empty_offs; -+ int check_len; -+ uint8_t *p; -+ -+ if (c->min_io_size == 1) { -+ check_len = c->leb_size - offs; -+ p = buf + check_len; -+ for (; check_len > 0; check_len--) -+ if (*--p != 0xff) -+ break; -+ /* -+ * 'check_len' is the size of the corruption which cannot be -+ * more than the size of 1 node if it was caused by an unclean -+ * unmount. -+ */ -+ if (check_len > UBIFS_MAX_NODE_SZ) -+ return 0; -+ return 1; -+ } -+ -+ /* -+ * Round up to the next c->min_io_size boundary i.e. 'offs' is in the -+ * last wbuf written. After that should be empty space. -+ */ -+ empty_offs = ALIGN(offs + 1, c->min_io_size); -+ check_len = c->leb_size - empty_offs; -+ p = buf + empty_offs - offs; -+ -+ for (; check_len > 0; check_len--) -+ if (*p++ != 0xff) -+ return 0; -+ return 1; -+} -+ -+/** -+ * clean_buf - clean the data from an LEB sitting in a buffer. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to clean -+ * @lnum: LEB number to clean -+ * @offs: offset from which to clean -+ * @len: length of buffer -+ * -+ * This function pads up to the next min_io_size boundary (if there is one) and -+ * sets empty space to all 0xff. @buf, @offs and @len are updated to the next -+ * min_io_size boundary (if there is one). -+ */ -+static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, -+ int *offs, int *len) -+{ -+ int empty_offs, pad_len; -+ -+ lnum = lnum; -+ dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); -+ -+ if (c->min_io_size == 1) { -+ memset(*buf, 0xff, c->leb_size - *offs); -+ return; -+ } -+ -+ ubifs_assert(!(*offs & 7)); -+ empty_offs = ALIGN(*offs, c->min_io_size); -+ pad_len = empty_offs - *offs; -+ ubifs_pad(c, *buf, pad_len); -+ *offs += pad_len; -+ *buf += pad_len; -+ *len -= pad_len; -+ memset(*buf, 0xff, c->leb_size - empty_offs); -+} -+ -+/** -+ * no_more_nodes - determine if there are no more nodes in a buffer. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to check -+ * @len: length of buffer -+ * @lnum: LEB number of the LEB from which @buf was read -+ * @offs: offset from which @buf was read -+ * -+ * This function scans @buf for more nodes and returns %0 is a node is found and -+ * %1 if no more nodes are found. -+ */ -+static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, -+ int lnum, int offs) -+{ -+ int skip, next_offs = 0; -+ -+ if (len > UBIFS_DATA_NODE_SZ) { -+ struct ubifs_ch *ch = buf; -+ int dlen = le32_to_cpu(ch->len); -+ -+ if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && -+ dlen <= UBIFS_MAX_DATA_NODE_SZ) -+ /* The corrupt node looks like a data node */ -+ next_offs = ALIGN(offs + dlen, 8); -+ } -+ -+ if (c->min_io_size == 1) -+ skip = 8; -+ else -+ skip = ALIGN(offs + 1, c->min_io_size) - offs; -+ -+ offs += skip; -+ buf += skip; -+ len -= skip; -+ while (len > 8) { -+ struct ubifs_ch *ch = buf; -+ uint32_t magic = le32_to_cpu(ch->magic); -+ int ret; -+ -+ if (magic == UBIFS_NODE_MAGIC) { -+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); -+ if (ret == SCANNED_A_NODE || ret > 0) { -+ /* -+ * There is a small chance this is just data in -+ * a data node, so check that possibility. e.g. -+ * this is part of a file that itself contains -+ * a UBIFS image. -+ */ -+ if (next_offs && offs + le32_to_cpu(ch->len) <= -+ next_offs) -+ continue; -+ dbg_rcvry("unexpected node at %d:%d", lnum, -+ offs); -+ return 0; -+ } -+ } -+ offs += 8; -+ buf += 8; -+ len -= 8; -+ } -+ return 1; -+} -+ -+/** -+ * fix_unclean_leb - fix an unclean LEB. -+ * @c: UBIFS file-system description object -+ * @sleb: scanned LEB information -+ * @start: offset where scan started -+ */ -+static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, -+ int start) -+{ -+ int lnum = sleb->lnum, endpt = start; -+ -+ /* Get the end offset of the last node we are keeping */ -+ if (!list_empty(&sleb->nodes)) { -+ struct ubifs_scan_node *snod; -+ -+ snod = list_entry(sleb->nodes.prev, -+ struct ubifs_scan_node, list); -+ endpt = snod->offs + snod->len; -+ } -+ -+ if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { -+ /* Add to recovery list */ -+ struct ubifs_unclean_leb *ucleb; -+ -+ dbg_rcvry("need to fix LEB %d start %d endpt %d", -+ lnum, start, sleb->endpt); -+ ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); -+ if (!ucleb) -+ return -ENOMEM; -+ ucleb->lnum = lnum; -+ ucleb->endpt = endpt; -+ list_add_tail(&ucleb->list, &c->unclean_leb_list); -+ } else { -+ /* Write the fixed LEB back to flash */ -+ int err; -+ -+ dbg_rcvry("fixing LEB %d start %d endpt %d", -+ lnum, start, sleb->endpt); -+ if (endpt == 0) { -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ } else { -+ int len = ALIGN(endpt, c->min_io_size); -+ -+ if (start) { -+ err = ubi_read(c->ubi, lnum, sleb->buf, 0, -+ start); -+ if (err) -+ return err; -+ } -+ /* Pad to min_io_size */ -+ if (len > endpt) { -+ int pad_len = len - ALIGN(endpt, 8); -+ -+ if (pad_len > 0) { -+ void *buf = sleb->buf + len - pad_len; -+ -+ ubifs_pad(c, buf, pad_len); -+ } -+ } -+ err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, -+ UBI_UNKNOWN); -+ if (err) -+ return err; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * drop_incomplete_group - drop nodes from an incomplete group. -+ * @sleb: scanned LEB information -+ * @offs: offset of dropped nodes is returned here -+ * -+ * This function returns %1 if nodes are dropped and %0 otherwise. -+ */ -+static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) -+{ -+ int dropped = 0; -+ -+ while (!list_empty(&sleb->nodes)) { -+ struct ubifs_scan_node *snod; -+ struct ubifs_ch *ch; -+ -+ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, -+ list); -+ ch = snod->node; -+ if (ch->group_type != UBIFS_IN_NODE_GROUP) -+ return dropped; -+ dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); -+ *offs = snod->offs; -+ list_del(&snod->list); -+ kfree(snod); -+ sleb->nodes_cnt -= 1; -+ dropped = 1; -+ } -+ return dropped; -+} -+ -+/** -+ * ubifs_recover_leb - scan and recover a LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number -+ * @offs: offset -+ * @sbuf: LEB-sized buffer to use -+ * @grouped: nodes may be grouped for recovery -+ * -+ * This function does a scan of a LEB, but caters for errors that might have -+ * been caused by the unclean unmount from which we are attempting to recover. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, -+ int offs, void *sbuf, int grouped) -+{ -+ int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; -+ int empty_chkd = 0, start = offs; -+ struct ubifs_scan_leb *sleb; -+ void *buf = sbuf + offs; -+ -+ dbg_rcvry("%d:%d", lnum, offs); -+ -+ sleb = ubifs_start_scan(c, lnum, offs, sbuf); -+ if (IS_ERR(sleb)) -+ return sleb; -+ -+ if (sleb->ecc) -+ need_clean = 1; -+ -+ while (len >= 8) { -+ int ret; -+ -+ dbg_scan("look at LEB %d:%d (%d bytes left)", -+ lnum, offs, len); -+ -+ cond_resched(); -+ -+ /* -+ * Scan quietly until there is an error from which we cannot -+ * recover -+ */ -+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); -+ -+ if (ret == SCANNED_A_NODE) { -+ /* A valid node, and not a padding node */ -+ struct ubifs_ch *ch = buf; -+ int node_len; -+ -+ err = ubifs_add_snod(c, sleb, buf, offs); -+ if (err) -+ goto error; -+ node_len = ALIGN(le32_to_cpu(ch->len), 8); -+ offs += node_len; -+ buf += node_len; -+ len -= node_len; -+ continue; -+ } -+ -+ if (ret > 0) { -+ /* Padding bytes or a valid padding node */ -+ offs += ret; -+ buf += ret; -+ len -= ret; -+ continue; -+ } -+ -+ if (ret == SCANNED_EMPTY_SPACE) { -+ if (!is_empty(buf, len)) { -+ if (!is_last_write(c, buf, offs)) -+ break; -+ clean_buf(c, &buf, lnum, &offs, &len); -+ need_clean = 1; -+ } -+ empty_chkd = 1; -+ break; -+ } -+ -+ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) -+ if (is_last_write(c, buf, offs)) { -+ clean_buf(c, &buf, lnum, &offs, &len); -+ need_clean = 1; -+ empty_chkd = 1; -+ break; -+ } -+ -+ if (ret == SCANNED_A_CORRUPT_NODE) -+ if (no_more_nodes(c, buf, len, lnum, offs)) { -+ clean_buf(c, &buf, lnum, &offs, &len); -+ need_clean = 1; -+ empty_chkd = 1; -+ break; -+ } -+ -+ if (quiet) { -+ /* Redo the last scan but noisily */ -+ quiet = 0; -+ continue; -+ } -+ -+ switch (ret) { -+ case SCANNED_GARBAGE: -+ dbg_err("garbage"); -+ goto corrupted; -+ case SCANNED_A_CORRUPT_NODE: -+ case SCANNED_A_BAD_PAD_NODE: -+ dbg_err("bad node"); -+ goto corrupted; -+ default: -+ dbg_err("unknown"); -+ goto corrupted; -+ } -+ } -+ -+ if (!empty_chkd && !is_empty(buf, len)) { -+ if (is_last_write(c, buf, offs)) { -+ clean_buf(c, &buf, lnum, &offs, &len); -+ need_clean = 1; -+ } else { -+ ubifs_err("corrupt empty space at LEB %d:%d", -+ lnum, offs); -+ goto corrupted; -+ } -+ } -+ -+ /* Drop nodes from incomplete group */ -+ if (grouped && drop_incomplete_group(sleb, &offs)) { -+ buf = sbuf + offs; -+ len = c->leb_size - offs; -+ clean_buf(c, &buf, lnum, &offs, &len); -+ need_clean = 1; -+ } -+ -+ if (offs % c->min_io_size) { -+ clean_buf(c, &buf, lnum, &offs, &len); -+ need_clean = 1; -+ } -+ -+ ubifs_end_scan(c, sleb, lnum, offs); -+ -+ if (need_clean) { -+ err = fix_unclean_leb(c, sleb, start); -+ if (err) -+ goto error; -+ } -+ -+ return sleb; -+ -+corrupted: -+ ubifs_scanned_corruption(c, lnum, offs, buf); -+ err = -EUCLEAN; -+error: -+ ubifs_err("LEB %d scanning failed", lnum); -+ ubifs_scan_destroy(sleb); -+ return ERR_PTR(err); -+} -+ -+/** -+ * get_cs_sqnum - get commit start sequence number. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number of commit start node -+ * @offs: offset of commit start node -+ * @cs_sqnum: commit start sequence number is returned here -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, -+ unsigned long long *cs_sqnum) -+{ -+ struct ubifs_cs_node *cs_node = NULL; -+ int err, ret; -+ -+ dbg_rcvry("at %d:%d", lnum, offs); -+ cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); -+ if (!cs_node) -+ return -ENOMEM; -+ if (c->leb_size - offs < UBIFS_CS_NODE_SZ) -+ goto out_err; -+ err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); -+ if (err && err != -EBADMSG) -+ goto out_free; -+ ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); -+ if (ret != SCANNED_A_NODE) { -+ dbg_err("Not a valid node"); -+ goto out_err; -+ } -+ if (cs_node->ch.node_type != UBIFS_CS_NODE) { -+ dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); -+ goto out_err; -+ } -+ if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { -+ dbg_err("CS node cmt_no %llu != current cmt_no %llu", -+ (unsigned long long)le64_to_cpu(cs_node->cmt_no), -+ c->cmt_no); -+ goto out_err; -+ } -+ *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); -+ dbg_rcvry("commit start sqnum %llu", *cs_sqnum); -+ kfree(cs_node); -+ return 0; -+ -+out_err: -+ err = -EINVAL; -+out_free: -+ ubifs_err("failed to get CS sqnum"); -+ kfree(cs_node); -+ return err; -+} -+ -+/** -+ * ubifs_recover_log_leb - scan and recover a log LEB. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number -+ * @offs: offset -+ * @sbuf: LEB-sized buffer to use -+ * -+ * This function does a scan of a LEB, but caters for errors that might have -+ * been caused by the unclean unmount from which we are attempting to recover. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, -+ int offs, void *sbuf) -+{ -+ struct ubifs_scan_leb *sleb; -+ int next_lnum; -+ -+ dbg_rcvry("LEB %d", lnum); -+ next_lnum = lnum + 1; -+ if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) -+ next_lnum = UBIFS_LOG_LNUM; -+ if (next_lnum != c->ltail_lnum) { -+ /* -+ * We can only recover at the end of the log, so check that the -+ * next log LEB is empty or out of date. -+ */ -+ sleb = ubifs_scan(c, next_lnum, 0, sbuf); -+ if (IS_ERR(sleb)) -+ return sleb; -+ if (sleb->nodes_cnt) { -+ struct ubifs_scan_node *snod; -+ unsigned long long cs_sqnum = c->cs_sqnum; -+ -+ snod = list_entry(sleb->nodes.next, -+ struct ubifs_scan_node, list); -+ if (cs_sqnum == 0) { -+ int err; -+ -+ err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); -+ if (err) { -+ ubifs_scan_destroy(sleb); -+ return ERR_PTR(err); -+ } -+ } -+ if (snod->sqnum > cs_sqnum) { -+ ubifs_err("unrecoverable log corruption " -+ "in LEB %d", lnum); -+ ubifs_scan_destroy(sleb); -+ return ERR_PTR(-EUCLEAN); -+ } -+ } -+ ubifs_scan_destroy(sleb); -+ } -+ return ubifs_recover_leb(c, lnum, offs, sbuf, 0); -+} -+ -+/** -+ * recover_head - recover a head. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number of head to recover -+ * @offs: offset of head to recover -+ * @sbuf: LEB-sized buffer to use -+ * -+ * This function ensures that there is no data on the flash at a head location. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int recover_head(const struct ubifs_info *c, int lnum, int offs, -+ void *sbuf) -+{ -+ int len, err, need_clean = 0; -+ -+ if (c->min_io_size > 1) -+ len = c->min_io_size; -+ else -+ len = 512; -+ if (offs + len > c->leb_size) -+ len = c->leb_size - offs; -+ -+ if (!len) -+ return 0; -+ -+ /* Read at the head location and check it is empty flash */ -+ err = ubi_read(c->ubi, lnum, sbuf, offs, len); -+ if (err) -+ need_clean = 1; -+ else { -+ uint8_t *p = sbuf; -+ -+ while (len--) -+ if (*p++ != 0xff) { -+ need_clean = 1; -+ break; -+ } -+ } -+ -+ if (need_clean) { -+ dbg_rcvry("cleaning head at %d:%d", lnum, offs); -+ if (offs == 0) -+ return ubifs_leb_unmap(c, lnum); -+ err = ubi_read(c->ubi, lnum, sbuf, 0, offs); -+ if (err) -+ return err; -+ return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); -+ } -+ -+ return 0; -+} -+ -+/** -+ * ubifs_recover_inl_heads - recover index and LPT heads. -+ * @c: UBIFS file-system description object -+ * @sbuf: LEB-sized buffer to use -+ * -+ * This function ensures that there is no data on the flash at the index and -+ * LPT head locations. -+ * -+ * This deals with the recovery of a half-completed journal commit. UBIFS is -+ * careful never to overwrite the last version of the index or the LPT. Because -+ * the index and LPT are wandering trees, data from a half-completed commit will -+ * not be referenced anywhere in UBIFS. The data will be either in LEBs that are -+ * assumed to be empty and will be unmapped anyway before use, or in the index -+ * and LPT heads. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) -+{ -+ int err; -+ -+ ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); -+ -+ dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); -+ err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); -+ if (err) -+ return err; -+ -+ dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); -+ err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/** -+ * clean_an_unclean_leb - read and write a LEB to remove corruption. -+ * @c: UBIFS file-system description object -+ * @ucleb: unclean LEB information -+ * @sbuf: LEB-sized buffer to use -+ * -+ * This function reads a LEB up to a point pre-determined by the mount recovery, -+ * checks the nodes, and writes the result back to the flash, thereby cleaning -+ * off any following corruption, or non-fatal ECC errors. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int clean_an_unclean_leb(const struct ubifs_info *c, -+ struct ubifs_unclean_leb *ucleb, void *sbuf) -+{ -+ int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; -+ void *buf = sbuf; -+ -+ dbg_rcvry("LEB %d len %d", lnum, len); -+ -+ if (len == 0) { -+ /* Nothing to read, just unmap it */ -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ return 0; -+ } -+ -+ err = ubi_read(c->ubi, lnum, buf, offs, len); -+ if (err && err != -EBADMSG) -+ return err; -+ -+ while (len >= 8) { -+ int ret; -+ -+ cond_resched(); -+ -+ /* Scan quietly until there is an error */ -+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); -+ -+ if (ret == SCANNED_A_NODE) { -+ /* A valid node, and not a padding node */ -+ struct ubifs_ch *ch = buf; -+ int node_len; -+ -+ node_len = ALIGN(le32_to_cpu(ch->len), 8); -+ offs += node_len; -+ buf += node_len; -+ len -= node_len; -+ continue; -+ } -+ -+ if (ret > 0) { -+ /* Padding bytes or a valid padding node */ -+ offs += ret; -+ buf += ret; -+ len -= ret; -+ continue; -+ } -+ -+ if (ret == SCANNED_EMPTY_SPACE) { -+ ubifs_err("unexpected empty space at %d:%d", -+ lnum, offs); -+ return -EUCLEAN; -+ } -+ -+ if (quiet) { -+ /* Redo the last scan but noisily */ -+ quiet = 0; -+ continue; -+ } -+ -+ ubifs_scanned_corruption(c, lnum, offs, buf); -+ return -EUCLEAN; -+ } -+ -+ /* Pad to min_io_size */ -+ len = ALIGN(ucleb->endpt, c->min_io_size); -+ if (len > ucleb->endpt) { -+ int pad_len = len - ALIGN(ucleb->endpt, 8); -+ -+ if (pad_len > 0) { -+ buf = c->sbuf + len - pad_len; -+ ubifs_pad(c, buf, pad_len); -+ } -+ } -+ -+ /* Write back the LEB atomically */ -+ err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); -+ if (err) -+ return err; -+ -+ dbg_rcvry("cleaned LEB %d", lnum); -+ -+ return 0; -+} -+ -+/** -+ * ubifs_clean_lebs - clean LEBs recovered during read-only mount. -+ * @c: UBIFS file-system description object -+ * @sbuf: LEB-sized buffer to use -+ * -+ * This function cleans a LEB identified during recovery that needs to be -+ * written but was not because UBIFS was mounted read-only. This happens when -+ * remounting to read-write mode. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) -+{ -+ dbg_rcvry("recovery"); -+ while (!list_empty(&c->unclean_leb_list)) { -+ struct ubifs_unclean_leb *ucleb; -+ int err; -+ -+ ucleb = list_entry(c->unclean_leb_list.next, -+ struct ubifs_unclean_leb, list); -+ err = clean_an_unclean_leb(c, ucleb, sbuf); -+ if (err) -+ return err; -+ list_del(&ucleb->list); -+ kfree(ucleb); -+ } -+ return 0; -+} -+ -+/** -+ * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. -+ * @c: UBIFS file-system description object -+ * -+ * Out-of-place garbage collection requires always one empty LEB with which to -+ * start garbage collection. The LEB number is recorded in c->gc_lnum and is -+ * written to the master node on unmounting. In the case of an unclean unmount -+ * the value of gc_lnum recorded in the master node is out of date and cannot -+ * be used. Instead, recovery must allocate an empty LEB for this purpose. -+ * However, there may not be enough empty space, in which case it must be -+ * possible to GC the dirtiest LEB into the GC head LEB. -+ * -+ * This function also runs the commit which causes the TNC updates from -+ * size-recovery and orphans to be written to the flash. That is important to -+ * ensure correct replay order for subsequent mounts. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_rcvry_gc_commit(struct ubifs_info *c) -+{ -+ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; -+ struct ubifs_lprops lp; -+ int lnum, err; -+ -+ c->gc_lnum = -1; -+ if (wbuf->lnum == -1) { -+ dbg_rcvry("no GC head LEB"); -+ goto find_free; -+ } -+ /* -+ * See whether the used space in the dirtiest LEB fits in the GC head -+ * LEB. -+ */ -+ if (wbuf->offs == c->leb_size) { -+ dbg_rcvry("no room in GC head LEB"); -+ goto find_free; -+ } -+ err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); -+ if (err) { -+ if (err == -ENOSPC) -+ dbg_err("could not find a dirty LEB"); -+ return err; -+ } -+ ubifs_assert(!(lp.flags & LPROPS_INDEX)); -+ lnum = lp.lnum; -+ if (lp.free + lp.dirty == c->leb_size) { -+ /* An empty LEB was returned */ -+ if (lp.free != c->leb_size) { -+ err = ubifs_change_one_lp(c, lnum, c->leb_size, -+ 0, 0, 0, 0); -+ if (err) -+ return err; -+ } -+ err = ubifs_leb_unmap(c, lnum); -+ if (err) -+ return err; -+ c->gc_lnum = lnum; -+ dbg_rcvry("allocated LEB %d for GC", lnum); -+ /* Run the commit */ -+ dbg_rcvry("committing"); -+ return ubifs_run_commit(c); -+ } -+ /* -+ * There was no empty LEB so the used space in the dirtiest LEB must fit -+ * in the GC head LEB. -+ */ -+ if (lp.free + lp.dirty < wbuf->offs) { -+ dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", -+ lnum, wbuf->lnum, wbuf->offs); -+ err = ubifs_return_leb(c, lnum); -+ if (err) -+ return err; -+ goto find_free; -+ } -+ /* -+ * We run the commit before garbage collection otherwise subsequent -+ * mounts will see the GC and orphan deletion in a different order. -+ */ -+ dbg_rcvry("committing"); -+ err = ubifs_run_commit(c); -+ if (err) -+ return err; -+ /* -+ * The data in the dirtiest LEB fits in the GC head LEB, so do the GC -+ * - use locking to keep 'ubifs_assert()' happy. -+ */ -+ dbg_rcvry("GC'ing LEB %d", lnum); -+ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); -+ err = ubifs_garbage_collect_leb(c, &lp); -+ if (err >= 0) { -+ int err2 = ubifs_wbuf_sync_nolock(wbuf); -+ -+ if (err2) -+ err = err2; -+ } -+ mutex_unlock(&wbuf->io_mutex); -+ if (err < 0) { -+ dbg_err("GC failed, error %d", err); -+ if (err == -EAGAIN) -+ err = -EINVAL; -+ return err; -+ } -+ if (err != LEB_RETAINED) { -+ dbg_err("GC returned %d", err); -+ return -EINVAL; -+ } -+ err = ubifs_leb_unmap(c, c->gc_lnum); -+ if (err) -+ return err; -+ dbg_rcvry("allocated LEB %d for GC", lnum); -+ return 0; -+ -+find_free: -+ /* -+ * There is no GC head LEB or the free space in the GC head LEB is too -+ * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so -+ * GC is not run. -+ */ -+ lnum = ubifs_find_free_leb_for_idx(c); -+ if (lnum < 0) { -+ dbg_err("could not find an empty LEB"); -+ return lnum; -+ } -+ /* And reset the index flag */ -+ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, -+ LPROPS_INDEX, 0); -+ if (err) -+ return err; -+ c->gc_lnum = lnum; -+ dbg_rcvry("allocated LEB %d for GC", lnum); -+ /* Run the commit */ -+ dbg_rcvry("committing"); -+ return ubifs_run_commit(c); -+} -+ -+/** -+ * struct size_entry - inode size information for recovery. -+ * @rb: link in the RB-tree of sizes -+ * @inum: inode number -+ * @i_size: size on inode -+ * @d_size: maximum size based on data nodes -+ * @exists: indicates whether the inode exists -+ * @inode: inode if pinned in memory awaiting rw mode to fix it -+ */ -+struct size_entry { -+ struct rb_node rb; -+ ino_t inum; -+ loff_t i_size; -+ loff_t d_size; -+ int exists; -+ struct inode *inode; -+}; -+ -+/** -+ * add_ino - add an entry to the size tree. -+ * @c: UBIFS file-system description object -+ * @inum: inode number -+ * @i_size: size on inode -+ * @d_size: maximum size based on data nodes -+ * @exists: indicates whether the inode exists -+ */ -+static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, -+ loff_t d_size, int exists) -+{ -+ struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; -+ struct size_entry *e; -+ -+ while (*p) { -+ parent = *p; -+ e = rb_entry(parent, struct size_entry, rb); -+ if (inum < e->inum) -+ p = &(*p)->rb_left; -+ else -+ p = &(*p)->rb_right; -+ } -+ -+ e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); -+ if (!e) -+ return -ENOMEM; -+ -+ e->inum = inum; -+ e->i_size = i_size; -+ e->d_size = d_size; -+ e->exists = exists; -+ -+ rb_link_node(&e->rb, parent, p); -+ rb_insert_color(&e->rb, &c->size_tree); -+ -+ return 0; -+} -+ -+/** -+ * find_ino - find an entry on the size tree. -+ * @c: UBIFS file-system description object -+ * @inum: inode number -+ */ -+static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) -+{ -+ struct rb_node *p = c->size_tree.rb_node; -+ struct size_entry *e; -+ -+ while (p) { -+ e = rb_entry(p, struct size_entry, rb); -+ if (inum < e->inum) -+ p = p->rb_left; -+ else if (inum > e->inum) -+ p = p->rb_right; -+ else -+ return e; -+ } -+ return NULL; -+} -+ -+/** -+ * remove_ino - remove an entry from the size tree. -+ * @c: UBIFS file-system description object -+ * @inum: inode number -+ */ -+static void remove_ino(struct ubifs_info *c, ino_t inum) -+{ -+ struct size_entry *e = find_ino(c, inum); -+ -+ if (!e) -+ return; -+ rb_erase(&e->rb, &c->size_tree); -+ kfree(e); -+} -+ -+/** -+ * ubifs_destroy_size_tree - free resources related to the size tree. -+ * @c: UBIFS file-system description object -+ */ -+void ubifs_destroy_size_tree(struct ubifs_info *c) -+{ -+ struct rb_node *this = c->size_tree.rb_node; -+ struct size_entry *e; -+ -+ while (this) { -+ if (this->rb_left) { -+ this = this->rb_left; -+ continue; -+ } else if (this->rb_right) { -+ this = this->rb_right; -+ continue; -+ } -+ e = rb_entry(this, struct size_entry, rb); -+ if (e->inode) -+ iput(e->inode); -+ this = rb_parent(this); -+ if (this) { -+ if (this->rb_left == &e->rb) -+ this->rb_left = NULL; -+ else -+ this->rb_right = NULL; -+ } -+ kfree(e); -+ } -+ c->size_tree = RB_ROOT; -+} -+ -+/** -+ * ubifs_recover_size_accum - accumulate inode sizes for recovery. -+ * @c: UBIFS file-system description object -+ * @key: node key -+ * @deletion: node is for a deletion -+ * @new_size: inode size -+ * -+ * This function has two purposes: -+ * 1) to ensure there are no data nodes that fall outside the inode size -+ * 2) to ensure there are no data nodes for inodes that do not exist -+ * To accomplish those purposes, a rb-tree is constructed containing an entry -+ * for each inode number in the journal that has not been deleted, and recording -+ * the size from the inode node, the maximum size of any data node (also altered -+ * by truncations) and a flag indicating a inode number for which no inode node -+ * was present in the journal. -+ * -+ * Note that there is still the possibility that there are data nodes that have -+ * been committed that are beyond the inode size, however the only way to find -+ * them would be to scan the entire index. Alternatively, some provision could -+ * be made to record the size of inodes at the start of commit, which would seem -+ * very cumbersome for a scenario that is quite unlikely and the only negative -+ * consequence of which is wasted space. -+ * -+ * This functions returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, -+ int deletion, loff_t new_size) -+{ -+ ino_t inum = key_inum(c, key); -+ struct size_entry *e; -+ int err; -+ -+ switch (key_type(c, key)) { -+ case UBIFS_INO_KEY: -+ if (deletion) -+ remove_ino(c, inum); -+ else { -+ e = find_ino(c, inum); -+ if (e) { -+ e->i_size = new_size; -+ e->exists = 1; -+ } else { -+ err = add_ino(c, inum, new_size, 0, 1); -+ if (err) -+ return err; -+ } -+ } -+ break; -+ case UBIFS_DATA_KEY: -+ e = find_ino(c, inum); -+ if (e) { -+ if (new_size > e->d_size) -+ e->d_size = new_size; -+ } else { -+ err = add_ino(c, inum, 0, new_size, 0); -+ if (err) -+ return err; -+ } -+ break; -+ case UBIFS_TRUN_KEY: -+ e = find_ino(c, inum); -+ if (e) -+ e->d_size = new_size; -+ break; -+ } -+ return 0; -+} -+ -+/** -+ * fix_size_in_place - fix inode size in place on flash. -+ * @c: UBIFS file-system description object -+ * @e: inode size information for recovery -+ */ -+static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) -+{ -+ struct ubifs_ino_node *ino = c->sbuf; -+ unsigned char *p; -+ union ubifs_key key; -+ int err, lnum, offs, len; -+ loff_t i_size; -+ uint32_t crc; -+ -+ /* Locate the inode node LEB number and offset */ -+ ino_key_init(c, &key, e->inum); -+ err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs); -+ if (err) -+ goto out; -+ /* -+ * If the size recorded on the inode node is greater than the size that -+ * was calculated from nodes in the journal then don't change the inode. -+ */ -+ i_size = le64_to_cpu(ino->size); -+ if (i_size >= e->d_size) -+ return 0; -+ /* Read the LEB */ -+ err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); -+ if (err) -+ goto out; -+ /* Change the size field and recalculate the CRC */ -+ ino = c->sbuf + offs; -+ ino->size = cpu_to_le64(e->d_size); -+ len = le32_to_cpu(ino->ch.len); -+ crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); -+ ino->ch.crc = cpu_to_le32(crc); -+ /* Work out where data in the LEB ends and free space begins */ -+ p = c->sbuf; -+ len = c->leb_size - 1; -+ while (p[len] == 0xff) -+ len -= 1; -+ len = ALIGN(len + 1, c->min_io_size); -+ /* Atomically write the fixed LEB back again */ -+ err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); -+ if (err) -+ goto out; -+ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", -+ (unsigned long)e->inum, lnum, offs, i_size, e->d_size); -+ return 0; -+ -+out: -+ ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", -+ (unsigned long)e->inum, e->i_size, e->d_size, err); -+ return err; -+} -+ -+/** -+ * ubifs_recover_size - recover inode size. -+ * @c: UBIFS file-system description object -+ * -+ * This function attempts to fix inode size discrepancies identified by the -+ * 'ubifs_recover_size_accum()' function. -+ * -+ * This functions returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_recover_size(struct ubifs_info *c) -+{ -+ struct rb_node *this = rb_first(&c->size_tree); -+ -+ while (this) { -+ struct size_entry *e; -+ int err; -+ -+ e = rb_entry(this, struct size_entry, rb); -+ if (!e->exists) { -+ union ubifs_key key; -+ -+ ino_key_init(c, &key, e->inum); -+ err = ubifs_tnc_lookup(c, &key, c->sbuf); -+ if (err && err != -ENOENT) -+ return err; -+ if (err == -ENOENT) { -+ /* Remove data nodes that have no inode */ -+ dbg_rcvry("removing ino %lu", -+ (unsigned long)e->inum); -+ err = ubifs_tnc_remove_ino(c, e->inum); -+ if (err) -+ return err; -+ } else { -+ struct ubifs_ino_node *ino = c->sbuf; -+ -+ e->exists = 1; -+ e->i_size = le64_to_cpu(ino->size); -+ } -+ } -+ if (e->exists && e->i_size < e->d_size) { -+ if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { -+ /* Fix the inode size and pin it in memory */ -+ struct inode *inode; -+ -+ inode = ubifs_iget(c->vfs_sb, e->inum); -+ if (IS_ERR(inode)) -+ return PTR_ERR(inode); -+ if (inode->i_size < e->d_size) { -+ dbg_rcvry("ino %lu size %lld -> %lld", -+ (unsigned long)e->inum, -+ e->d_size, inode->i_size); -+ inode->i_size = e->d_size; -+ ubifs_inode(inode)->ui_size = e->d_size; -+ e->inode = inode; -+ this = rb_next(this); -+ continue; -+ } -+ iput(inode); -+ } else { -+ /* Fix the size in place */ -+ err = fix_size_in_place(c, e); -+ if (err) -+ return err; -+ if (e->inode) -+ iput(e->inode); -+ } -+ } -+ this = rb_next(this); -+ rb_erase(&e->rb, &c->size_tree); -+ kfree(e); -+ } -+ return 0; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/replay.c linux-2.6.24/fs/ubifs/replay.c ---- linux-2.6.24.orig/fs/ubifs/replay.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/replay.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1084 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file contains journal replay code. It runs when the file-system is being -+ * mounted and requires no locking. -+ * -+ * The larger is the journal, the longer it takes to scan it, so the longer it -+ * takes to mount UBIFS. This is why the journal has limited size which may be -+ * changed depending on the system requirements. But a larger journal gives -+ * faster I/O speed because it writes the index less frequently. So this is a -+ * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the -+ * larger is the journal, the more memory its index may consume. -+ */ -+ -+#include "ubifs.h" -+ -+/* -+ * Replay flags. -+ * -+ * REPLAY_DELETION: node was deleted -+ * REPLAY_REF: node is a reference node -+ */ -+enum { -+ REPLAY_DELETION = 1, -+ REPLAY_REF = 2, -+}; -+ -+/** -+ * struct replay_entry - replay tree entry. -+ * @lnum: logical eraseblock number of the node -+ * @offs: node offset -+ * @len: node length -+ * @sqnum: node sequence number -+ * @flags: replay flags -+ * @rb: links the replay tree -+ * @key: node key -+ * @nm: directory entry name -+ * @old_size: truncation old size -+ * @new_size: truncation new size -+ * @free: amount of free space in a bud -+ * @dirty: amount of dirty space in a bud from padding and deletion nodes -+ * -+ * UBIFS journal replay must compare node sequence numbers, which means it must -+ * build a tree of node information to insert into the TNC. -+ */ -+struct replay_entry { -+ int lnum; -+ int offs; -+ int len; -+ unsigned long long sqnum; -+ int flags; -+ struct rb_node rb; -+ union ubifs_key key; -+ union { -+ struct qstr nm; -+ struct { -+ loff_t old_size; -+ loff_t new_size; -+ }; -+ struct { -+ int free; -+ int dirty; -+ }; -+ }; -+}; -+ -+/** -+ * struct bud_entry - entry in the list of buds to replay. -+ * @list: next bud in the list -+ * @bud: bud description object -+ * @free: free bytes in the bud -+ * @sqnum: reference node sequence number -+ */ -+struct bud_entry { -+ struct list_head list; -+ struct ubifs_bud *bud; -+ int free; -+ unsigned long long sqnum; -+}; -+ -+/** -+ * set_bud_lprops - set free and dirty space used by a bud. -+ * @c: UBIFS file-system description object -+ * @r: replay entry of bud -+ */ -+static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) -+{ -+ const struct ubifs_lprops *lp; -+ int err = 0, dirty; -+ -+ ubifs_get_lprops(c); -+ -+ lp = ubifs_lpt_lookup_dirty(c, r->lnum); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ -+ dirty = lp->dirty; -+ if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { -+ /* -+ * The LEB was added to the journal with a starting offset of -+ * zero which means the LEB must have been empty. The LEB -+ * property values should be lp->free == c->leb_size and -+ * lp->dirty == 0, but that is not the case. The reason is that -+ * the LEB was garbage collected. The garbage collector resets -+ * the free and dirty space without recording it anywhere except -+ * lprops, so if there is not a commit then lprops does not have -+ * that information next time the file system is mounted. -+ * -+ * We do not need to adjust free space because the scan has told -+ * us the exact value which is recorded in the replay entry as -+ * r->free. -+ * -+ * However we do need to subtract from the dirty space the -+ * amount of space that the garbage collector reclaimed, which -+ * is the whole LEB minus the amount of space that was free. -+ */ -+ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, -+ lp->free, lp->dirty); -+ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, -+ lp->free, lp->dirty); -+ dirty -= c->leb_size - lp->free; -+ /* -+ * If the replay order was perfect the dirty space would now be -+ * zero. The order is not perfect because the the journal heads -+ * race with each other. This is not a problem but is does mean -+ * that the dirty space may temporarily exceed c->leb_size -+ * during the replay. -+ */ -+ if (dirty != 0) -+ dbg_msg("LEB %d lp: %d free %d dirty " -+ "replay: %d free %d dirty", r->lnum, lp->free, -+ lp->dirty, r->free, r->dirty); -+ } -+ lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, -+ lp->flags | LPROPS_TAKEN, 0); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+out: -+ ubifs_release_lprops(c); -+ return err; -+} -+ -+/** -+ * trun_remove_range - apply a replay entry for a truncation to the TNC. -+ * @c: UBIFS file-system description object -+ * @r: replay entry of truncation -+ */ -+static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) -+{ -+ unsigned min_blk, max_blk; -+ union ubifs_key min_key, max_key; -+ ino_t ino; -+ -+ min_blk = r->new_size / UBIFS_BLOCK_SIZE; -+ if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) -+ min_blk += 1; -+ -+ max_blk = r->old_size / UBIFS_BLOCK_SIZE; -+ if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) -+ max_blk -= 1; -+ -+ ino = key_inum(c, &r->key); -+ -+ data_key_init(c, &min_key, ino, min_blk); -+ data_key_init(c, &max_key, ino, max_blk); -+ -+ return ubifs_tnc_remove_range(c, &min_key, &max_key); -+} -+ -+/** -+ * apply_replay_entry - apply a replay entry to the TNC. -+ * @c: UBIFS file-system description object -+ * @r: replay entry to apply -+ * -+ * Apply a replay entry to the TNC. -+ */ -+static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) -+{ -+ int err, deletion = ((r->flags & REPLAY_DELETION) != 0); -+ -+ dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, -+ r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); -+ -+ /* Set c->replay_sqnum to help deal with dangling branches. */ -+ c->replay_sqnum = r->sqnum; -+ -+ if (r->flags & REPLAY_REF) -+ err = set_bud_lprops(c, r); -+ else if (is_hash_key(c, &r->key)) { -+ if (deletion) -+ err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); -+ else -+ err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, -+ r->len, &r->nm); -+ } else { -+ if (deletion) -+ switch (key_type(c, &r->key)) { -+ case UBIFS_INO_KEY: -+ { -+ ino_t inum = key_inum(c, &r->key); -+ -+ err = ubifs_tnc_remove_ino(c, inum); -+ break; -+ } -+ case UBIFS_TRUN_KEY: -+ err = trun_remove_range(c, r); -+ break; -+ default: -+ err = ubifs_tnc_remove(c, &r->key); -+ break; -+ } -+ else -+ err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, -+ r->len); -+ if (err) -+ return err; -+ -+ if (c->need_recovery) -+ err = ubifs_recover_size_accum(c, &r->key, deletion, -+ r->new_size); -+ } -+ -+ return err; -+} -+ -+/** -+ * destroy_replay_tree - destroy the replay. -+ * @c: UBIFS file-system description object -+ * -+ * Destroy the replay tree. -+ */ -+static void destroy_replay_tree(struct ubifs_info *c) -+{ -+ struct rb_node *this = c->replay_tree.rb_node; -+ struct replay_entry *r; -+ -+ while (this) { -+ if (this->rb_left) { -+ this = this->rb_left; -+ continue; -+ } else if (this->rb_right) { -+ this = this->rb_right; -+ continue; -+ } -+ r = rb_entry(this, struct replay_entry, rb); -+ this = rb_parent(this); -+ if (this) { -+ if (this->rb_left == &r->rb) -+ this->rb_left = NULL; -+ else -+ this->rb_right = NULL; -+ } -+ if (is_hash_key(c, &r->key)) -+ kfree(r->nm.name); -+ kfree(r); -+ } -+ c->replay_tree = RB_ROOT; -+} -+ -+/** -+ * apply_replay_tree - apply the replay tree to the TNC. -+ * @c: UBIFS file-system description object -+ * -+ * Apply the replay tree. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+static int apply_replay_tree(struct ubifs_info *c) -+{ -+ struct rb_node *this = rb_first(&c->replay_tree); -+ -+ while (this) { -+ struct replay_entry *r; -+ int err; -+ -+ cond_resched(); -+ -+ r = rb_entry(this, struct replay_entry, rb); -+ err = apply_replay_entry(c, r); -+ if (err) -+ return err; -+ this = rb_next(this); -+ } -+ return 0; -+} -+ -+/** -+ * insert_node - insert a node to the replay tree. -+ * @c: UBIFS file-system description object -+ * @lnum: node logical eraseblock number -+ * @offs: node offset -+ * @len: node length -+ * @key: node key -+ * @sqnum: sequence number -+ * @deletion: non-zero if this is a deletion -+ * @used: number of bytes in use in a LEB -+ * @old_size: truncation old size -+ * @new_size: truncation new size -+ * -+ * This function inserts a scanned non-direntry node to the replay tree. The -+ * replay tree is an RB-tree containing @struct replay_entry elements which are -+ * indexed by the sequence number. The replay tree is applied at the very end -+ * of the replay process. Since the tree is sorted in sequence number order, -+ * the older modifications are applied first. This function returns zero in -+ * case of success and a negative error code in case of failure. -+ */ -+static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, -+ union ubifs_key *key, unsigned long long sqnum, -+ int deletion, int *used, loff_t old_size, -+ loff_t new_size) -+{ -+ struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; -+ struct replay_entry *r; -+ -+ if (key_inum(c, key) >= c->highest_inum) -+ c->highest_inum = key_inum(c, key); -+ -+ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); -+ while (*p) { -+ parent = *p; -+ r = rb_entry(parent, struct replay_entry, rb); -+ if (sqnum < r->sqnum) { -+ p = &(*p)->rb_left; -+ continue; -+ } else if (sqnum > r->sqnum) { -+ p = &(*p)->rb_right; -+ continue; -+ } -+ ubifs_err("duplicate sqnum in replay"); -+ return -EINVAL; -+ } -+ -+ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); -+ if (!r) -+ return -ENOMEM; -+ -+ if (!deletion) -+ *used += ALIGN(len, 8); -+ r->lnum = lnum; -+ r->offs = offs; -+ r->len = len; -+ r->sqnum = sqnum; -+ r->flags = (deletion ? REPLAY_DELETION : 0); -+ r->old_size = old_size; -+ r->new_size = new_size; -+ key_copy(c, key, &r->key); -+ -+ rb_link_node(&r->rb, parent, p); -+ rb_insert_color(&r->rb, &c->replay_tree); -+ return 0; -+} -+ -+/** -+ * insert_dent - insert a directory entry node into the replay tree. -+ * @c: UBIFS file-system description object -+ * @lnum: node logical eraseblock number -+ * @offs: node offset -+ * @len: node length -+ * @key: node key -+ * @name: directory entry name -+ * @nlen: directory entry name length -+ * @sqnum: sequence number -+ * @deletion: non-zero if this is a deletion -+ * @used: number of bytes in use in a LEB -+ * -+ * This function inserts a scanned directory entry node to the replay tree. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ * -+ * This function is also used for extended attribute entries because they are -+ * implemented as directory entry nodes. -+ */ -+static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, -+ union ubifs_key *key, const char *name, int nlen, -+ unsigned long long sqnum, int deletion, int *used) -+{ -+ struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; -+ struct replay_entry *r; -+ char *nbuf; -+ -+ if (key_inum(c, key) >= c->highest_inum) -+ c->highest_inum = key_inum(c, key); -+ -+ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); -+ while (*p) { -+ parent = *p; -+ r = rb_entry(parent, struct replay_entry, rb); -+ if (sqnum < r->sqnum) { -+ p = &(*p)->rb_left; -+ continue; -+ } -+ if (sqnum > r->sqnum) { -+ p = &(*p)->rb_right; -+ continue; -+ } -+ ubifs_err("duplicate sqnum in replay"); -+ return -EINVAL; -+ } -+ -+ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); -+ if (!r) -+ return -ENOMEM; -+ nbuf = kmalloc(nlen + 1, GFP_KERNEL); -+ if (!nbuf) { -+ kfree(r); -+ return -ENOMEM; -+ } -+ -+ if (!deletion) -+ *used += ALIGN(len, 8); -+ r->lnum = lnum; -+ r->offs = offs; -+ r->len = len; -+ r->sqnum = sqnum; -+ r->nm.len = nlen; -+ memcpy(nbuf, name, nlen); -+ nbuf[nlen] = '\0'; -+ r->nm.name = nbuf; -+ r->flags = (deletion ? REPLAY_DELETION : 0); -+ key_copy(c, key, &r->key); -+ -+ ubifs_assert(!*p); -+ rb_link_node(&r->rb, parent, p); -+ rb_insert_color(&r->rb, &c->replay_tree); -+ return 0; -+} -+ -+/** -+ * ubifs_validate_entry - validate directory or extended attribute entry node. -+ * @c: UBIFS file-system description object -+ * @dent: the node to validate -+ * -+ * This function validates directory or extended attribute entry node @dent. -+ * Returns zero if the node is all right and a %-EINVAL if not. -+ */ -+int ubifs_validate_entry(struct ubifs_info *c, -+ const struct ubifs_dent_node *dent) -+{ -+ int key_type = key_type_flash(c, dent->key); -+ int nlen = le16_to_cpu(dent->nlen); -+ -+ if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || -+ dent->type >= UBIFS_ITYPES_CNT || -+ nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || -+ strnlen(dent->name, nlen) != nlen || -+ le64_to_cpu(dent->inum) > MAX_INUM) { -+ ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? -+ "directory entry" : "extended attribute entry"); -+ return -EINVAL; -+ } -+ -+ if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { -+ ubifs_err("bad key type %d", key_type); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+/** -+ * replay_bud - replay a bud logical eraseblock. -+ * @c: UBIFS file-system description object -+ * @lnum: bud logical eraseblock number to replay -+ * @offs: bud start offset -+ * @jhead: journal head to which this bud belongs -+ * @free: amount of free space in the bud is returned here -+ * @dirty: amount of dirty space from padding and deletion nodes is returned -+ * here -+ * -+ * This function returns zero in case of success and a negative error code in -+ * case of failure. -+ */ -+static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, -+ int *free, int *dirty) -+{ -+ int err = 0, used = 0; -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ struct ubifs_bud *bud; -+ -+ dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); -+ if (c->need_recovery) -+ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); -+ else -+ sleb = ubifs_scan(c, lnum, offs, c->sbuf); -+ if (IS_ERR(sleb)) -+ return PTR_ERR(sleb); -+ -+ /* -+ * The bud does not have to start from offset zero - the beginning of -+ * the 'lnum' LEB may contain previously committed data. One of the -+ * things we have to do in replay is to correctly update lprops with -+ * newer information about this LEB. -+ * -+ * At this point lprops thinks that this LEB has 'c->leb_size - offs' -+ * bytes of free space because it only contain information about -+ * committed data. -+ * -+ * But we know that real amount of free space is 'c->leb_size - -+ * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and -+ * 'sleb->endpt' is used by bud data. We have to correctly calculate -+ * how much of these data are dirty and update lprops with this -+ * information. -+ * -+ * The dirt in that LEB region is comprised of padding nodes, deletion -+ * nodes, truncation nodes and nodes which are obsoleted by subsequent -+ * nodes in this LEB. So instead of calculating clean space, we -+ * calculate used space ('used' variable). -+ */ -+ -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ int deletion = 0; -+ -+ cond_resched(); -+ -+ if (snod->sqnum >= SQNUM_WATERMARK) { -+ ubifs_err("file system's life ended"); -+ goto out_dump; -+ } -+ -+ if (snod->sqnum > c->max_sqnum) -+ c->max_sqnum = snod->sqnum; -+ -+ switch (snod->type) { -+ case UBIFS_INO_NODE: -+ { -+ struct ubifs_ino_node *ino = snod->node; -+ loff_t new_size = le64_to_cpu(ino->size); -+ -+ if (le32_to_cpu(ino->nlink) == 0) -+ deletion = 1; -+ err = insert_node(c, lnum, snod->offs, snod->len, -+ &snod->key, snod->sqnum, deletion, -+ &used, 0, new_size); -+ break; -+ } -+ case UBIFS_DATA_NODE: -+ { -+ struct ubifs_data_node *dn = snod->node; -+ loff_t new_size = le32_to_cpu(dn->size) + -+ key_block(c, &snod->key) * -+ UBIFS_BLOCK_SIZE; -+ -+ err = insert_node(c, lnum, snod->offs, snod->len, -+ &snod->key, snod->sqnum, deletion, -+ &used, 0, new_size); -+ break; -+ } -+ case UBIFS_DENT_NODE: -+ case UBIFS_XENT_NODE: -+ { -+ struct ubifs_dent_node *dent = snod->node; -+ -+ err = ubifs_validate_entry(c, dent); -+ if (err) -+ goto out_dump; -+ -+ err = insert_dent(c, lnum, snod->offs, snod->len, -+ &snod->key, dent->name, -+ le16_to_cpu(dent->nlen), snod->sqnum, -+ !le64_to_cpu(dent->inum), &used); -+ break; -+ } -+ case UBIFS_TRUN_NODE: -+ { -+ struct ubifs_trun_node *trun = snod->node; -+ loff_t old_size = le64_to_cpu(trun->old_size); -+ loff_t new_size = le64_to_cpu(trun->new_size); -+ union ubifs_key key; -+ -+ /* Validate truncation node */ -+ if (old_size < 0 || old_size > c->max_inode_sz || -+ new_size < 0 || new_size > c->max_inode_sz || -+ old_size <= new_size) { -+ ubifs_err("bad truncation node"); -+ goto out_dump; -+ } -+ -+ /* -+ * Create a fake truncation key just to use the same -+ * functions which expect nodes to have keys. -+ */ -+ trun_key_init(c, &key, le32_to_cpu(trun->inum)); -+ err = insert_node(c, lnum, snod->offs, snod->len, -+ &key, snod->sqnum, 1, &used, -+ old_size, new_size); -+ break; -+ } -+ default: -+ ubifs_err("unexpected node type %d in bud LEB %d:%d", -+ snod->type, lnum, snod->offs); -+ err = -EINVAL; -+ goto out_dump; -+ } -+ if (err) -+ goto out; -+ } -+ -+ bud = ubifs_search_bud(c, lnum); -+ if (!bud) -+ BUG(); -+ -+ ubifs_assert(sleb->endpt - offs >= used); -+ ubifs_assert(sleb->endpt % c->min_io_size == 0); -+ -+ if (sleb->endpt + c->min_io_size <= c->leb_size && -+ !(c->vfs_sb->s_flags & MS_RDONLY)) -+ err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, -+ sleb->endpt, UBI_SHORTTERM); -+ -+ *dirty = sleb->endpt - offs - used; -+ *free = c->leb_size - sleb->endpt; -+ -+out: -+ ubifs_scan_destroy(sleb); -+ return err; -+ -+out_dump: -+ ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); -+ dbg_dump_node(c, snod->node); -+ ubifs_scan_destroy(sleb); -+ return -EINVAL; -+} -+ -+/** -+ * insert_ref_node - insert a reference node to the replay tree. -+ * @c: UBIFS file-system description object -+ * @lnum: node logical eraseblock number -+ * @offs: node offset -+ * @sqnum: sequence number -+ * @free: amount of free space in bud -+ * @dirty: amount of dirty space from padding and deletion nodes -+ * -+ * This function inserts a reference node to the replay tree and returns zero -+ * in case of success or a negative error code in case of failure. -+ */ -+static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, -+ unsigned long long sqnum, int free, int dirty) -+{ -+ struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; -+ struct replay_entry *r; -+ -+ dbg_mnt("add ref LEB %d:%d", lnum, offs); -+ while (*p) { -+ parent = *p; -+ r = rb_entry(parent, struct replay_entry, rb); -+ if (sqnum < r->sqnum) { -+ p = &(*p)->rb_left; -+ continue; -+ } else if (sqnum > r->sqnum) { -+ p = &(*p)->rb_right; -+ continue; -+ } -+ ubifs_err("duplicate sqnum in replay tree"); -+ return -EINVAL; -+ } -+ -+ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); -+ if (!r) -+ return -ENOMEM; -+ -+ r->lnum = lnum; -+ r->offs = offs; -+ r->sqnum = sqnum; -+ r->flags = REPLAY_REF; -+ r->free = free; -+ r->dirty = dirty; -+ -+ rb_link_node(&r->rb, parent, p); -+ rb_insert_color(&r->rb, &c->replay_tree); -+ return 0; -+} -+ -+/** -+ * replay_buds - replay all buds. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns zero in case of success and a negative error code in -+ * case of failure. -+ */ -+static int replay_buds(struct ubifs_info *c) -+{ -+ struct bud_entry *b; -+ int err, uninitialized_var(free), uninitialized_var(dirty); -+ -+ list_for_each_entry(b, &c->replay_buds, list) { -+ err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, -+ &free, &dirty); -+ if (err) -+ return err; -+ err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, -+ free, dirty); -+ if (err) -+ return err; -+ } -+ -+ return 0; -+} -+ -+/** -+ * destroy_bud_list - destroy the list of buds to replay. -+ * @c: UBIFS file-system description object -+ */ -+static void destroy_bud_list(struct ubifs_info *c) -+{ -+ struct bud_entry *b; -+ -+ while (!list_empty(&c->replay_buds)) { -+ b = list_entry(c->replay_buds.next, struct bud_entry, list); -+ list_del(&b->list); -+ kfree(b); -+ } -+} -+ -+/** -+ * add_replay_bud - add a bud to the list of buds to replay. -+ * @c: UBIFS file-system description object -+ * @lnum: bud logical eraseblock number to replay -+ * @offs: bud start offset -+ * @jhead: journal head to which this bud belongs -+ * @sqnum: reference node sequence number -+ * -+ * This function returns zero in case of success and a negative error code in -+ * case of failure. -+ */ -+static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, -+ unsigned long long sqnum) -+{ -+ struct ubifs_bud *bud; -+ struct bud_entry *b; -+ -+ dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); -+ -+ bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); -+ if (!bud) -+ return -ENOMEM; -+ -+ b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); -+ if (!b) { -+ kfree(bud); -+ return -ENOMEM; -+ } -+ -+ bud->lnum = lnum; -+ bud->start = offs; -+ bud->jhead = jhead; -+ ubifs_add_bud(c, bud); -+ -+ b->bud = bud; -+ b->sqnum = sqnum; -+ list_add_tail(&b->list, &c->replay_buds); -+ -+ return 0; -+} -+ -+/** -+ * validate_ref - validate a reference node. -+ * @c: UBIFS file-system description object -+ * @ref: the reference node to validate -+ * @ref_lnum: LEB number of the reference node -+ * @ref_offs: reference node offset -+ * -+ * This function returns %1 if a bud reference already exists for the LEB. %0 is -+ * returned if the reference node is new, otherwise %-EINVAL is returned if -+ * validation failed. -+ */ -+static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) -+{ -+ struct ubifs_bud *bud; -+ int lnum = le32_to_cpu(ref->lnum); -+ unsigned int offs = le32_to_cpu(ref->offs); -+ unsigned int jhead = le32_to_cpu(ref->jhead); -+ -+ /* -+ * ref->offs may point to the end of LEB when the journal head points -+ * to the end of LEB and we write reference node for it during commit. -+ * So this is why we require 'offs > c->leb_size'. -+ */ -+ if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || -+ lnum < c->main_first || offs > c->leb_size || -+ offs & (c->min_io_size - 1)) -+ return -EINVAL; -+ -+ /* Make sure we have not already looked at this bud */ -+ bud = ubifs_search_bud(c, lnum); -+ if (bud) { -+ if (bud->jhead == jhead && bud->start <= offs) -+ return 1; -+ ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+/** -+ * replay_log_leb - replay a log logical eraseblock. -+ * @c: UBIFS file-system description object -+ * @lnum: log logical eraseblock to replay -+ * @offs: offset to start replaying from -+ * @sbuf: scan buffer -+ * -+ * This function replays a log LEB and returns zero in case of success, %1 if -+ * this is the last LEB in the log, and a negative error code in case of -+ * failure. -+ */ -+static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) -+{ -+ int err; -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ const struct ubifs_cs_node *node; -+ -+ dbg_mnt("replay log LEB %d:%d", lnum, offs); -+ sleb = ubifs_scan(c, lnum, offs, sbuf); -+ if (IS_ERR(sleb)) { -+ if (c->need_recovery) -+ sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); -+ if (IS_ERR(sleb)) -+ return PTR_ERR(sleb); -+ } -+ -+ if (sleb->nodes_cnt == 0) { -+ err = 1; -+ goto out; -+ } -+ -+ node = sleb->buf; -+ -+ snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); -+ if (c->cs_sqnum == 0) { -+ /* -+ * This is the first log LEB we are looking at, make sure that -+ * the first node is a commit start node. Also record its -+ * sequence number so that UBIFS can determine where the log -+ * ends, because all nodes which were have higher sequence -+ * numbers. -+ */ -+ if (snod->type != UBIFS_CS_NODE) { -+ dbg_err("first log node at LEB %d:%d is not CS node", -+ lnum, offs); -+ goto out_dump; -+ } -+ if (le64_to_cpu(node->cmt_no) != c->cmt_no) { -+ dbg_err("first CS node at LEB %d:%d has wrong " -+ "commit number %llu expected %llu", -+ lnum, offs, -+ (unsigned long long)le64_to_cpu(node->cmt_no), -+ c->cmt_no); -+ goto out_dump; -+ } -+ -+ c->cs_sqnum = le64_to_cpu(node->ch.sqnum); -+ dbg_mnt("commit start sqnum %llu", c->cs_sqnum); -+ } -+ -+ if (snod->sqnum < c->cs_sqnum) { -+ /* -+ * This means that we reached end of log and now -+ * look to the older log data, which was already -+ * committed but the eraseblock was not erased (UBIFS -+ * only un-maps it). So this basically means we have to -+ * exit with "end of log" code. -+ */ -+ err = 1; -+ goto out; -+ } -+ -+ /* Make sure the first node sits at offset zero of the LEB */ -+ if (snod->offs != 0) { -+ dbg_err("first node is not at zero offset"); -+ goto out_dump; -+ } -+ -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ -+ cond_resched(); -+ -+ if (snod->sqnum >= SQNUM_WATERMARK) { -+ ubifs_err("file system's life ended"); -+ goto out_dump; -+ } -+ -+ if (snod->sqnum < c->cs_sqnum) { -+ dbg_err("bad sqnum %llu, commit sqnum %llu", -+ snod->sqnum, c->cs_sqnum); -+ goto out_dump; -+ } -+ -+ if (snod->sqnum > c->max_sqnum) -+ c->max_sqnum = snod->sqnum; -+ -+ switch (snod->type) { -+ case UBIFS_REF_NODE: { -+ const struct ubifs_ref_node *ref = snod->node; -+ -+ err = validate_ref(c, ref); -+ if (err == 1) -+ break; /* Already have this bud */ -+ if (err) -+ goto out_dump; -+ -+ err = add_replay_bud(c, le32_to_cpu(ref->lnum), -+ le32_to_cpu(ref->offs), -+ le32_to_cpu(ref->jhead), -+ snod->sqnum); -+ if (err) -+ goto out; -+ -+ break; -+ } -+ case UBIFS_CS_NODE: -+ /* Make sure it sits at the beginning of LEB */ -+ if (snod->offs != 0) { -+ ubifs_err("unexpected node in log"); -+ goto out_dump; -+ } -+ break; -+ default: -+ ubifs_err("unexpected node in log"); -+ goto out_dump; -+ } -+ } -+ -+ if (sleb->endpt || c->lhead_offs >= c->leb_size) { -+ c->lhead_lnum = lnum; -+ c->lhead_offs = sleb->endpt; -+ } -+ -+ err = !sleb->endpt; -+out: -+ ubifs_scan_destroy(sleb); -+ return err; -+ -+out_dump: -+ ubifs_err("log error detected while replying the log at LEB %d:%d", -+ lnum, offs + snod->offs); -+ dbg_dump_node(c, snod->node); -+ ubifs_scan_destroy(sleb); -+ return -EINVAL; -+} -+ -+/** -+ * take_ihead - update the status of the index head in lprops to 'taken'. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns the amount of free space in the index head LEB or a -+ * negative error code. -+ */ -+static int take_ihead(struct ubifs_info *c) -+{ -+ const struct ubifs_lprops *lp; -+ int err, free; -+ -+ ubifs_get_lprops(c); -+ -+ lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ -+ free = lp->free; -+ -+ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, -+ lp->flags | LPROPS_TAKEN, 0); -+ if (IS_ERR(lp)) { -+ err = PTR_ERR(lp); -+ goto out; -+ } -+ -+ err = free; -+out: -+ ubifs_release_lprops(c); -+ return err; -+} -+ -+/** -+ * ubifs_replay_journal - replay journal. -+ * @c: UBIFS file-system description object -+ * -+ * This function scans the journal, replays and cleans it up. It makes sure all -+ * memory data structures related to uncommitted journal are built (dirty TNC -+ * tree, tree of buds, modified lprops, etc). -+ */ -+int ubifs_replay_journal(struct ubifs_info *c) -+{ -+ int err, i, lnum, offs, free; -+ void *sbuf = NULL; -+ -+ BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); -+ -+ /* Update the status of the index head in lprops to 'taken' */ -+ free = take_ihead(c); -+ if (free < 0) -+ return free; /* Error code */ -+ -+ if (c->ihead_offs != c->leb_size - free) { -+ ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, -+ c->ihead_offs); -+ return -EINVAL; -+ } -+ -+ sbuf = vmalloc(c->leb_size); -+ if (!sbuf) -+ return -ENOMEM; -+ -+ dbg_mnt("start replaying the journal"); -+ -+ c->replaying = 1; -+ -+ lnum = c->ltail_lnum = c->lhead_lnum; -+ offs = c->lhead_offs; -+ -+ for (i = 0; i < c->log_lebs; i++, lnum++) { -+ if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { -+ /* -+ * The log is logically circular, we reached the last -+ * LEB, switch to the first one. -+ */ -+ lnum = UBIFS_LOG_LNUM; -+ offs = 0; -+ } -+ err = replay_log_leb(c, lnum, offs, sbuf); -+ if (err == 1) -+ /* We hit the end of the log */ -+ break; -+ if (err) -+ goto out; -+ offs = 0; -+ } -+ -+ err = replay_buds(c); -+ if (err) -+ goto out; -+ -+ err = apply_replay_tree(c); -+ if (err) -+ goto out; -+ -+ /* -+ * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable -+ * to roughly estimate index growth. Things like @c->min_idx_lebs -+ * depend on it. This means we have to initialize it to make sure -+ * budgeting works properly. -+ */ -+ c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); -+ c->budg_uncommitted_idx *= c->max_idx_node_sz; -+ -+ ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); -+ dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " -+ "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, -+ (unsigned long)c->highest_inum); -+out: -+ destroy_replay_tree(c); -+ destroy_bud_list(c); -+ vfree(sbuf); -+ c->replaying = 0; -+ return err; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/sb.c linux-2.6.24/fs/ubifs/sb.c ---- linux-2.6.24.orig/fs/ubifs/sb.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/sb.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,634 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file implements UBIFS superblock. The superblock is stored at the first -+ * LEB of the volume and is never changed by UBIFS. Only user-space tools may -+ * change it. The superblock node mostly contains geometry information. -+ */ -+ -+#include "ubifs.h" -+#include <linux/random.h> -+ -+/* -+ * Default journal size in logical eraseblocks as a percent of total -+ * flash size. -+ */ -+#define DEFAULT_JNL_PERCENT 5 -+ -+/* Default maximum journal size in bytes */ -+#define DEFAULT_MAX_JNL (32*1024*1024) -+ -+/* Default indexing tree fanout */ -+#define DEFAULT_FANOUT 8 -+ -+/* Default number of data journal heads */ -+#define DEFAULT_JHEADS_CNT 1 -+ -+/* Default positions of different LEBs in the main area */ -+#define DEFAULT_IDX_LEB 0 -+#define DEFAULT_DATA_LEB 1 -+#define DEFAULT_GC_LEB 2 -+ -+/* Default number of LEB numbers in LPT's save table */ -+#define DEFAULT_LSAVE_CNT 256 -+ -+/* Default reserved pool size as a percent of maximum free space */ -+#define DEFAULT_RP_PERCENT 5 -+ -+/* The default maximum size of reserved pool in bytes */ -+#define DEFAULT_MAX_RP_SIZE (5*1024*1024) -+ -+/* Default UBIFS compressor */ -+#define DEFAULT_COMPRESSOR UBIFS_COMPR_LZO -+ -+/* Default time granularity in nanoseconds */ -+#define DEFAULT_TIME_GRAN 1000000000 -+ -+/** -+ * create_default_filesystem - format empty UBI volume. -+ * @c: UBIFS file-system description object -+ * -+ * This function creates default empty file-system. Returns zero in case of -+ * success and a negative error code in case of failure. -+ */ -+static int create_default_filesystem(struct ubifs_info *c) -+{ -+ struct ubifs_sb_node *sup; -+ struct ubifs_mst_node *mst; -+ struct ubifs_idx_node *idx; -+ struct ubifs_branch *br; -+ struct ubifs_ino_node *ino; -+ struct ubifs_cs_node *cs; -+ union ubifs_key key; -+ int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; -+ int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; -+ int min_leb_cnt = UBIFS_MIN_LEB_CNT; -+ long long tmp64, main_bytes; -+ __le64 tmp_le64; -+ -+ /* Some functions called from here depend on the @c->key_len filed */ -+ c->key_len = UBIFS_SK_LEN; -+ -+ /* -+ * First of all, we have to calculate default file-system geometry - -+ * log size, journal size, etc. -+ */ -+ if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT) -+ /* We can first multiply then divide and have no overflow */ -+ jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100; -+ else -+ jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT; -+ -+ if (jnl_lebs < UBIFS_MIN_JNL_LEBS) -+ jnl_lebs = UBIFS_MIN_JNL_LEBS; -+ if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL) -+ jnl_lebs = DEFAULT_MAX_JNL / c->leb_size; -+ -+ /* -+ * The log should be large enough to fit reference nodes for all bud -+ * LEBs. Because buds do not have to start from the beginning of LEBs -+ * (half of the LEB may contain committed data), the log should -+ * generally be larger, make it twice as large. -+ */ -+ tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1; -+ log_lebs = tmp / c->leb_size; -+ /* Plus one LEB reserved for commit */ -+ log_lebs += 1; -+ if (c->leb_cnt - min_leb_cnt > 8) { -+ /* And some extra space to allow writes while committing */ -+ log_lebs += 1; -+ min_leb_cnt += 1; -+ } -+ -+ max_buds = jnl_lebs - log_lebs; -+ if (max_buds < UBIFS_MIN_BUD_LEBS) -+ max_buds = UBIFS_MIN_BUD_LEBS; -+ -+ /* -+ * Orphan nodes are stored in a separate area. One node can store a lot -+ * of orphan inode numbers, but when new orphan comes we just add a new -+ * orphan node. At some point the nodes are consolidated into one -+ * orphan node. -+ */ -+ orph_lebs = UBIFS_MIN_ORPH_LEBS; -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ if (c->leb_cnt - min_leb_cnt > 1) -+ /* -+ * For debugging purposes it is better to have at least 2 -+ * orphan LEBs, because the orphan subsystem would need to do -+ * consolidations and would be stressed more. -+ */ -+ orph_lebs += 1; -+#endif -+ -+ main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs; -+ main_lebs -= orph_lebs; -+ -+ lpt_first = UBIFS_LOG_LNUM + log_lebs; -+ c->lsave_cnt = DEFAULT_LSAVE_CNT; -+ c->max_leb_cnt = c->leb_cnt; -+ err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs, -+ &big_lpt); -+ if (err) -+ return err; -+ -+ dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first, -+ lpt_first + lpt_lebs - 1); -+ -+ main_first = c->leb_cnt - main_lebs; -+ -+ /* Create default superblock */ -+ tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); -+ sup = kzalloc(tmp, GFP_KERNEL); -+ if (!sup) -+ return -ENOMEM; -+ -+ tmp64 = (long long)max_buds * c->leb_size; -+ if (big_lpt) -+ sup_flags |= UBIFS_FLG_BIGLPT; -+ -+ sup->ch.node_type = UBIFS_SB_NODE; -+ sup->key_hash = UBIFS_KEY_HASH_R5; -+ sup->flags = cpu_to_le32(sup_flags); -+ sup->min_io_size = cpu_to_le32(c->min_io_size); -+ sup->leb_size = cpu_to_le32(c->leb_size); -+ sup->leb_cnt = cpu_to_le32(c->leb_cnt); -+ sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt); -+ sup->max_bud_bytes = cpu_to_le64(tmp64); -+ sup->log_lebs = cpu_to_le32(log_lebs); -+ sup->lpt_lebs = cpu_to_le32(lpt_lebs); -+ sup->orph_lebs = cpu_to_le32(orph_lebs); -+ sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT); -+ sup->fanout = cpu_to_le32(DEFAULT_FANOUT); -+ sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); -+ sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); -+ sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); -+ if (c->mount_opts.override_compr) -+ sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); -+ else -+ sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); -+ -+ generate_random_uuid(sup->uuid); -+ -+ main_bytes = (long long)main_lebs * c->leb_size; -+ tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100); -+ if (tmp64 > DEFAULT_MAX_RP_SIZE) -+ tmp64 = DEFAULT_MAX_RP_SIZE; -+ sup->rp_size = cpu_to_le64(tmp64); -+ -+ err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); -+ kfree(sup); -+ if (err) -+ return err; -+ -+ dbg_gen("default superblock created at LEB 0:0"); -+ -+ /* Create default master node */ -+ mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); -+ if (!mst) -+ return -ENOMEM; -+ -+ mst->ch.node_type = UBIFS_MST_NODE; -+ mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM); -+ mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO); -+ mst->cmt_no = 0; -+ mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); -+ mst->root_offs = 0; -+ tmp = ubifs_idx_node_sz(c, 1); -+ mst->root_len = cpu_to_le32(tmp); -+ mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB); -+ mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); -+ mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size)); -+ mst->index_size = cpu_to_le64(ALIGN(tmp, 8)); -+ mst->lpt_lnum = cpu_to_le32(c->lpt_lnum); -+ mst->lpt_offs = cpu_to_le32(c->lpt_offs); -+ mst->nhead_lnum = cpu_to_le32(c->nhead_lnum); -+ mst->nhead_offs = cpu_to_le32(c->nhead_offs); -+ mst->ltab_lnum = cpu_to_le32(c->ltab_lnum); -+ mst->ltab_offs = cpu_to_le32(c->ltab_offs); -+ mst->lsave_lnum = cpu_to_le32(c->lsave_lnum); -+ mst->lsave_offs = cpu_to_le32(c->lsave_offs); -+ mst->lscan_lnum = cpu_to_le32(main_first); -+ mst->empty_lebs = cpu_to_le32(main_lebs - 2); -+ mst->idx_lebs = cpu_to_le32(1); -+ mst->leb_cnt = cpu_to_le32(c->leb_cnt); -+ -+ /* Calculate lprops statistics */ -+ tmp64 = main_bytes; -+ tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); -+ tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); -+ mst->total_free = cpu_to_le64(tmp64); -+ -+ tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); -+ ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) - -+ UBIFS_INO_NODE_SZ; -+ tmp64 += ino_waste; -+ tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8); -+ mst->total_dirty = cpu_to_le64(tmp64); -+ -+ /* The indexing LEB does not contribute to dark space */ -+ tmp64 = (c->main_lebs - 1) * c->dark_wm; -+ mst->total_dark = cpu_to_le64(tmp64); -+ -+ mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); -+ -+ err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0, -+ UBI_UNKNOWN); -+ if (err) { -+ kfree(mst); -+ return err; -+ } -+ err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0, -+ UBI_UNKNOWN); -+ kfree(mst); -+ if (err) -+ return err; -+ -+ dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM); -+ -+ /* Create the root indexing node */ -+ tmp = ubifs_idx_node_sz(c, 1); -+ idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); -+ if (!idx) -+ return -ENOMEM; -+ -+ c->key_fmt = UBIFS_SIMPLE_KEY_FMT; -+ c->key_hash = key_r5_hash; -+ -+ idx->ch.node_type = UBIFS_IDX_NODE; -+ idx->child_cnt = cpu_to_le16(1); -+ ino_key_init(c, &key, UBIFS_ROOT_INO); -+ br = ubifs_idx_branch(c, idx, 0); -+ key_write_idx(c, &key, &br->key); -+ br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB); -+ br->len = cpu_to_le32(UBIFS_INO_NODE_SZ); -+ err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0, -+ UBI_UNKNOWN); -+ kfree(idx); -+ if (err) -+ return err; -+ -+ dbg_gen("default root indexing node created LEB %d:0", -+ main_first + DEFAULT_IDX_LEB); -+ -+ /* Create default root inode */ -+ tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); -+ ino = kzalloc(tmp, GFP_KERNEL); -+ if (!ino) -+ return -ENOMEM; -+ -+ ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO); -+ ino->ch.node_type = UBIFS_INO_NODE; -+ ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); -+ ino->nlink = cpu_to_le32(2); -+ tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); -+ ino->atime_sec = tmp_le64; -+ ino->ctime_sec = tmp_le64; -+ ino->mtime_sec = tmp_le64; -+ ino->atime_nsec = 0; -+ ino->ctime_nsec = 0; -+ ino->mtime_nsec = 0; -+ ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); -+ ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); -+ -+ /* Set compression enabled by default */ -+ ino->flags = cpu_to_le32(UBIFS_COMPR_FL); -+ -+ err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ, -+ main_first + DEFAULT_DATA_LEB, 0, -+ UBI_UNKNOWN); -+ kfree(ino); -+ if (err) -+ return err; -+ -+ dbg_gen("root inode created at LEB %d:0", -+ main_first + DEFAULT_DATA_LEB); -+ -+ /* -+ * The first node in the log has to be the commit start node. This is -+ * always the case during normal file-system operation. Write a fake -+ * commit start node to the log. -+ */ -+ tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size); -+ cs = kzalloc(tmp, GFP_KERNEL); -+ if (!cs) -+ return -ENOMEM; -+ -+ cs->ch.node_type = UBIFS_CS_NODE; -+ err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, -+ 0, UBI_UNKNOWN); -+ kfree(cs); -+ -+ ubifs_msg("default file-system created"); -+ return 0; -+} -+ -+/** -+ * validate_sb - validate superblock node. -+ * @c: UBIFS file-system description object -+ * @sup: superblock node -+ * -+ * This function validates superblock node @sup. Since most of data was read -+ * from the superblock and stored in @c, the function validates fields in @c -+ * instead. Returns zero in case of success and %-EINVAL in case of validation -+ * failure. -+ */ -+static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) -+{ -+ long long max_bytes; -+ int err = 1, min_leb_cnt; -+ -+ if (!c->key_hash) { -+ err = 2; -+ goto failed; -+ } -+ -+ if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) { -+ err = 3; -+ goto failed; -+ } -+ -+ if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { -+ ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", -+ le32_to_cpu(sup->min_io_size), c->min_io_size); -+ goto failed; -+ } -+ -+ if (le32_to_cpu(sup->leb_size) != c->leb_size) { -+ ubifs_err("LEB size mismatch: %d in superblock, %d real", -+ le32_to_cpu(sup->leb_size), c->leb_size); -+ goto failed; -+ } -+ -+ if (c->log_lebs < UBIFS_MIN_LOG_LEBS || -+ c->lpt_lebs < UBIFS_MIN_LPT_LEBS || -+ c->orph_lebs < UBIFS_MIN_ORPH_LEBS || -+ c->main_lebs < UBIFS_MIN_MAIN_LEBS) { -+ err = 4; -+ goto failed; -+ } -+ -+ /* -+ * Calculate minimum allowed amount of main area LEBs. This is very -+ * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we -+ * have just read from the superblock. -+ */ -+ min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs; -+ min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; -+ -+ if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { -+ ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " -+ "%d minimum required", c->leb_cnt, c->vi.size, -+ min_leb_cnt); -+ goto failed; -+ } -+ -+ if (c->max_leb_cnt < c->leb_cnt) { -+ ubifs_err("max. LEB count %d less than LEB count %d", -+ c->max_leb_cnt, c->leb_cnt); -+ goto failed; -+ } -+ -+ if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { -+ err = 7; -+ goto failed; -+ } -+ -+ if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || -+ c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { -+ err = 8; -+ goto failed; -+ } -+ -+ if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 || -+ c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) { -+ err = 9; -+ goto failed; -+ } -+ -+ if (c->fanout < UBIFS_MIN_FANOUT || -+ ubifs_idx_node_sz(c, c->fanout) > c->leb_size) { -+ err = 10; -+ goto failed; -+ } -+ -+ if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT && -+ c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - -+ c->log_lebs - c->lpt_lebs - c->orph_lebs)) { -+ err = 11; -+ goto failed; -+ } -+ -+ if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs + -+ c->orph_lebs + c->main_lebs != c->leb_cnt) { -+ err = 12; -+ goto failed; -+ } -+ -+ if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { -+ err = 13; -+ goto failed; -+ } -+ -+ max_bytes = c->main_lebs * (long long)c->leb_size; -+ if (c->rp_size < 0 || max_bytes < c->rp_size) { -+ err = 14; -+ goto failed; -+ } -+ -+ if (le32_to_cpu(sup->time_gran) > 1000000000 || -+ le32_to_cpu(sup->time_gran) < 1) { -+ err = 15; -+ goto failed; -+ } -+ -+ return 0; -+ -+failed: -+ ubifs_err("bad superblock, error %d", err); -+ dbg_dump_node(c, sup); -+ return -EINVAL; -+} -+ -+/** -+ * ubifs_read_sb_node - read superblock node. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns a pointer to the superblock node or a negative error -+ * code. -+ */ -+struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) -+{ -+ struct ubifs_sb_node *sup; -+ int err; -+ -+ sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS); -+ if (!sup) -+ return ERR_PTR(-ENOMEM); -+ -+ err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ, -+ UBIFS_SB_LNUM, 0); -+ if (err) { -+ kfree(sup); -+ return ERR_PTR(err); -+ } -+ -+ return sup; -+} -+ -+/** -+ * ubifs_write_sb_node - write superblock node. -+ * @c: UBIFS file-system description object -+ * @sup: superblock node read with 'ubifs_read_sb_node()' -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup) -+{ -+ int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); -+ -+ ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1); -+ return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM); -+} -+ -+/** -+ * ubifs_read_superblock - read superblock. -+ * @c: UBIFS file-system description object -+ * -+ * This function finds, reads and checks the superblock. If an empty UBI volume -+ * is being mounted, this function creates default superblock. Returns zero in -+ * case of success, and a negative error code in case of failure. -+ */ -+int ubifs_read_superblock(struct ubifs_info *c) -+{ -+ int err, sup_flags; -+ struct ubifs_sb_node *sup; -+ -+ if (c->empty) { -+ err = create_default_filesystem(c); -+ if (err) -+ return err; -+ } -+ -+ sup = ubifs_read_sb_node(c); -+ if (IS_ERR(sup)) -+ return PTR_ERR(sup); -+ -+ /* -+ * The software supports all previous versions but not future versions, -+ * due to the unavailability of time-travelling equipment. -+ */ -+ c->fmt_version = le32_to_cpu(sup->fmt_version); -+ if (c->fmt_version > UBIFS_FORMAT_VERSION) { -+ ubifs_err("on-flash format version is %d, but software only " -+ "supports up to version %d", c->fmt_version, -+ UBIFS_FORMAT_VERSION); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ if (c->fmt_version < 3) { -+ ubifs_err("on-flash format version %d is not supported", -+ c->fmt_version); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ switch (sup->key_hash) { -+ case UBIFS_KEY_HASH_R5: -+ c->key_hash = key_r5_hash; -+ c->key_hash_type = UBIFS_KEY_HASH_R5; -+ break; -+ -+ case UBIFS_KEY_HASH_TEST: -+ c->key_hash = key_test_hash; -+ c->key_hash_type = UBIFS_KEY_HASH_TEST; -+ break; -+ }; -+ -+ c->key_fmt = sup->key_fmt; -+ -+ switch (c->key_fmt) { -+ case UBIFS_SIMPLE_KEY_FMT: -+ c->key_len = UBIFS_SK_LEN; -+ break; -+ default: -+ ubifs_err("unsupported key format"); -+ err = -EINVAL; -+ goto out; -+ } -+ -+ c->leb_cnt = le32_to_cpu(sup->leb_cnt); -+ c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt); -+ c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes); -+ c->log_lebs = le32_to_cpu(sup->log_lebs); -+ c->lpt_lebs = le32_to_cpu(sup->lpt_lebs); -+ c->orph_lebs = le32_to_cpu(sup->orph_lebs); -+ c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; -+ c->fanout = le32_to_cpu(sup->fanout); -+ c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); -+ c->rp_size = le64_to_cpu(sup->rp_size); -+ c->rp_uid = le32_to_cpu(sup->rp_uid); -+ c->rp_gid = le32_to_cpu(sup->rp_gid); -+ sup_flags = le32_to_cpu(sup->flags); -+ if (!c->mount_opts.override_compr) -+ c->default_compr = le16_to_cpu(sup->default_compr); -+ -+ c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); -+ memcpy(&c->uuid, &sup->uuid, 16); -+ c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); -+ -+ /* Automatically increase file system size to the maximum size */ -+ c->old_leb_cnt = c->leb_cnt; -+ if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { -+ c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); -+ if (c->vfs_sb->s_flags & MS_RDONLY) -+ dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", -+ c->old_leb_cnt, c->leb_cnt); -+ else { -+ dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs", -+ c->old_leb_cnt, c->leb_cnt); -+ sup->leb_cnt = cpu_to_le32(c->leb_cnt); -+ err = ubifs_write_sb_node(c, sup); -+ if (err) -+ goto out; -+ c->old_leb_cnt = c->leb_cnt; -+ } -+ } -+ -+ c->log_bytes = (long long)c->log_lebs * c->leb_size; -+ c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1; -+ c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs; -+ c->lpt_last = c->lpt_first + c->lpt_lebs - 1; -+ c->orph_first = c->lpt_last + 1; -+ c->orph_last = c->orph_first + c->orph_lebs - 1; -+ c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; -+ c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; -+ c->main_first = c->leb_cnt - c->main_lebs; -+ c->report_rp_size = ubifs_reported_space(c, c->rp_size); -+ -+ err = validate_sb(c, sup); -+out: -+ kfree(sup); -+ return err; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/scan.c linux-2.6.24/fs/ubifs/scan.c ---- linux-2.6.24.orig/fs/ubifs/scan.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/scan.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,362 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements the scan which is a general-purpose function for -+ * determining what nodes are in an eraseblock. The scan is used to replay the -+ * journal, to do garbage collection. for the TNC in-the-gaps method, and by -+ * debugging functions. -+ */ -+ -+#include "ubifs.h" -+ -+/** -+ * scan_padding_bytes - scan for padding bytes. -+ * @buf: buffer to scan -+ * @len: length of buffer -+ * -+ * This function returns the number of padding bytes on success and -+ * %SCANNED_GARBAGE on failure. -+ */ -+static int scan_padding_bytes(void *buf, int len) -+{ -+ int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len); -+ uint8_t *p = buf; -+ -+ dbg_scan("not a node"); -+ -+ while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE) -+ pad_len += 1; -+ -+ if (!pad_len || (pad_len & 7)) -+ return SCANNED_GARBAGE; -+ -+ dbg_scan("%d padding bytes", pad_len); -+ -+ return pad_len; -+} -+ -+/** -+ * ubifs_scan_a_node - scan for a node or padding. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to scan -+ * @len: length of buffer -+ * @lnum: logical eraseblock number -+ * @offs: offset within the logical eraseblock -+ * @quiet: print no messages -+ * -+ * This function returns a scanning code to indicate what was scanned. -+ */ -+int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, -+ int offs, int quiet) -+{ -+ struct ubifs_ch *ch = buf; -+ uint32_t magic; -+ -+ magic = le32_to_cpu(ch->magic); -+ -+ if (magic == 0xFFFFFFFF) { -+ dbg_scan("hit empty space"); -+ return SCANNED_EMPTY_SPACE; -+ } -+ -+ if (magic != UBIFS_NODE_MAGIC) -+ return scan_padding_bytes(buf, len); -+ -+ if (len < UBIFS_CH_SZ) -+ return SCANNED_GARBAGE; -+ -+ dbg_scan("scanning %s", dbg_ntype(ch->node_type)); -+ -+ if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) -+ return SCANNED_A_CORRUPT_NODE; -+ -+ if (ch->node_type == UBIFS_PAD_NODE) { -+ struct ubifs_pad_node *pad = buf; -+ int pad_len = le32_to_cpu(pad->pad_len); -+ int node_len = le32_to_cpu(ch->len); -+ -+ /* Validate the padding node */ -+ if (pad_len < 0 || -+ offs + node_len + pad_len > c->leb_size) { -+ if (!quiet) { -+ ubifs_err("bad pad node at LEB %d:%d", -+ lnum, offs); -+ dbg_dump_node(c, pad); -+ } -+ return SCANNED_A_BAD_PAD_NODE; -+ } -+ -+ /* Make the node pads to 8-byte boundary */ -+ if ((node_len + pad_len) & 7) { -+ if (!quiet) { -+ dbg_err("bad padding length %d - %d", -+ offs, offs + node_len + pad_len); -+ } -+ return SCANNED_A_BAD_PAD_NODE; -+ } -+ -+ dbg_scan("%d bytes padded, offset now %d", -+ pad_len, ALIGN(offs + node_len + pad_len, 8)); -+ -+ return node_len + pad_len; -+ } -+ -+ return SCANNED_A_NODE; -+} -+ -+/** -+ * ubifs_start_scan - create LEB scanning information at start of scan. -+ * @c: UBIFS file-system description object -+ * @lnum: logical eraseblock number -+ * @offs: offset to start at (usually zero) -+ * @sbuf: scan buffer (must be c->leb_size) -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, -+ int offs, void *sbuf) -+{ -+ struct ubifs_scan_leb *sleb; -+ int err; -+ -+ dbg_scan("scan LEB %d:%d", lnum, offs); -+ -+ sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS); -+ if (!sleb) -+ return ERR_PTR(-ENOMEM); -+ -+ sleb->lnum = lnum; -+ INIT_LIST_HEAD(&sleb->nodes); -+ sleb->buf = sbuf; -+ -+ err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); -+ if (err && err != -EBADMSG) { -+ ubifs_err("cannot read %d bytes from LEB %d:%d," -+ " error %d", c->leb_size - offs, lnum, offs, err); -+ kfree(sleb); -+ return ERR_PTR(err); -+ } -+ -+ if (err == -EBADMSG) -+ sleb->ecc = 1; -+ -+ return sleb; -+} -+ -+/** -+ * ubifs_end_scan - update LEB scanning information at end of scan. -+ * @c: UBIFS file-system description object -+ * @sleb: scanning information -+ * @lnum: logical eraseblock number -+ * @offs: offset to start at (usually zero) -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, -+ int lnum, int offs) -+{ -+ lnum = lnum; -+ dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); -+ ubifs_assert(offs % c->min_io_size == 0); -+ -+ sleb->endpt = ALIGN(offs, c->min_io_size); -+} -+ -+/** -+ * ubifs_add_snod - add a scanned node to LEB scanning information. -+ * @c: UBIFS file-system description object -+ * @sleb: scanning information -+ * @buf: buffer containing node -+ * @offs: offset of node on flash -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, -+ void *buf, int offs) -+{ -+ struct ubifs_ch *ch = buf; -+ struct ubifs_ino_node *ino = buf; -+ struct ubifs_scan_node *snod; -+ -+ snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); -+ if (!snod) -+ return -ENOMEM; -+ -+ snod->sqnum = le64_to_cpu(ch->sqnum); -+ snod->type = ch->node_type; -+ snod->offs = offs; -+ snod->len = le32_to_cpu(ch->len); -+ snod->node = buf; -+ -+ switch (ch->node_type) { -+ case UBIFS_INO_NODE: -+ case UBIFS_DENT_NODE: -+ case UBIFS_XENT_NODE: -+ case UBIFS_DATA_NODE: -+ case UBIFS_TRUN_NODE: -+ /* -+ * The key is in the same place in all keyed -+ * nodes. -+ */ -+ key_read(c, &ino->key, &snod->key); -+ break; -+ } -+ list_add_tail(&snod->list, &sleb->nodes); -+ sleb->nodes_cnt += 1; -+ return 0; -+} -+ -+/** -+ * ubifs_scanned_corruption - print information after UBIFS scanned corruption. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number of corruption -+ * @offs: offset of corruption -+ * @buf: buffer containing corruption -+ */ -+void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, -+ void *buf) -+{ -+ int len; -+ -+ ubifs_err("corrupted data at LEB %d:%d", lnum, offs); -+ if (dbg_failure_mode) -+ return; -+ len = c->leb_size - offs; -+ if (len > 4096) -+ len = 4096; -+ dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); -+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); -+} -+ -+/** -+ * ubifs_scan - scan a logical eraseblock. -+ * @c: UBIFS file-system description object -+ * @lnum: logical eraseblock number -+ * @offs: offset to start at (usually zero) -+ * @sbuf: scan buffer (must be c->leb_size) -+ * -+ * This function scans LEB number @lnum and returns complete information about -+ * its contents. Returns an error code in case of failure. -+ */ -+struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, -+ int offs, void *sbuf) -+{ -+ void *buf = sbuf + offs; -+ int err, len = c->leb_size - offs; -+ struct ubifs_scan_leb *sleb; -+ -+ sleb = ubifs_start_scan(c, lnum, offs, sbuf); -+ if (IS_ERR(sleb)) -+ return sleb; -+ -+ while (len >= 8) { -+ struct ubifs_ch *ch = buf; -+ int node_len, ret; -+ -+ dbg_scan("look at LEB %d:%d (%d bytes left)", -+ lnum, offs, len); -+ -+ cond_resched(); -+ -+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); -+ -+ if (ret > 0) { -+ /* Padding bytes or a valid padding node */ -+ offs += ret; -+ buf += ret; -+ len -= ret; -+ continue; -+ } -+ -+ if (ret == SCANNED_EMPTY_SPACE) -+ /* Empty space is checked later */ -+ break; -+ -+ switch (ret) { -+ case SCANNED_GARBAGE: -+ dbg_err("garbage"); -+ goto corrupted; -+ case SCANNED_A_NODE: -+ break; -+ case SCANNED_A_CORRUPT_NODE: -+ case SCANNED_A_BAD_PAD_NODE: -+ dbg_err("bad node"); -+ goto corrupted; -+ default: -+ dbg_err("unknown"); -+ goto corrupted; -+ } -+ -+ err = ubifs_add_snod(c, sleb, buf, offs); -+ if (err) -+ goto error; -+ -+ node_len = ALIGN(le32_to_cpu(ch->len), 8); -+ offs += node_len; -+ buf += node_len; -+ len -= node_len; -+ } -+ -+ if (offs % c->min_io_size) -+ goto corrupted; -+ -+ ubifs_end_scan(c, sleb, lnum, offs); -+ -+ for (; len > 4; offs += 4, buf = buf + 4, len -= 4) -+ if (*(uint32_t *)buf != 0xffffffff) -+ break; -+ for (; len; offs++, buf++, len--) -+ if (*(uint8_t *)buf != 0xff) { -+ ubifs_err("corrupt empty space at LEB %d:%d", -+ lnum, offs); -+ goto corrupted; -+ } -+ -+ return sleb; -+ -+corrupted: -+ ubifs_scanned_corruption(c, lnum, offs, buf); -+ err = -EUCLEAN; -+error: -+ ubifs_err("LEB %d scanning failed", lnum); -+ ubifs_scan_destroy(sleb); -+ return ERR_PTR(err); -+} -+ -+/** -+ * ubifs_scan_destroy - destroy LEB scanning information. -+ * @sleb: scanning information to free -+ */ -+void ubifs_scan_destroy(struct ubifs_scan_leb *sleb) -+{ -+ struct ubifs_scan_node *node; -+ struct list_head *head; -+ -+ head = &sleb->nodes; -+ while (!list_empty(head)) { -+ node = list_entry(head->next, struct ubifs_scan_node, list); -+ list_del(&node->list); -+ kfree(node); -+ } -+ kfree(sleb); -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/shrinker.c linux-2.6.24/fs/ubifs/shrinker.c ---- linux-2.6.24.orig/fs/ubifs/shrinker.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/shrinker.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,322 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file implements UBIFS shrinker which evicts clean znodes from the TNC -+ * tree when Linux VM needs more RAM. -+ * -+ * We do not implement any LRU lists to find oldest znodes to free because it -+ * would add additional overhead to the file system fast paths. So the shrinker -+ * just walks the TNC tree when searching for znodes to free. -+ * -+ * If the root of a TNC sub-tree is clean and old enough, then the children are -+ * also clean and old enough. So the shrinker walks the TNC in level order and -+ * dumps entire sub-trees. -+ * -+ * The age of znodes is just the time-stamp when they were last looked at. -+ * The current shrinker first tries to evict old znodes, then young ones. -+ * -+ * Since the shrinker is global, it has to protect against races with FS -+ * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. -+ */ -+ -+#include "ubifs.h" -+ -+/* List of all UBIFS file-system instances */ -+LIST_HEAD(ubifs_infos); -+ -+/* -+ * We number each shrinker run and record the number on the ubifs_info structure -+ * so that we can easily work out which ubifs_info structures have already been -+ * done by the current run. -+ */ -+static unsigned int shrinker_run_no; -+ -+/* Protects 'ubifs_infos' list */ -+DEFINE_SPINLOCK(ubifs_infos_lock); -+ -+/* Global clean znode counter (for all mounted UBIFS instances) */ -+atomic_long_t ubifs_clean_zn_cnt; -+ -+/** -+ * shrink_tnc - shrink TNC tree. -+ * @c: UBIFS file-system description object -+ * @nr: number of znodes to free -+ * @age: the age of znodes to free -+ * @contention: if any contention, this is set to %1 -+ * -+ * This function traverses TNC tree and frees clean znodes. It does not free -+ * clean znodes which younger then @age. Returns number of freed znodes. -+ */ -+static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) -+{ -+ int total_freed = 0; -+ struct ubifs_znode *znode, *zprev; -+ int time = get_seconds(); -+ -+ ubifs_assert(mutex_is_locked(&c->umount_mutex)); -+ ubifs_assert(mutex_is_locked(&c->tnc_mutex)); -+ -+ if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) -+ return 0; -+ -+ /* -+ * Traverse the TNC tree in levelorder manner, so that it is possible -+ * to destroy large sub-trees. Indeed, if a znode is old, then all its -+ * children are older or of the same age. -+ * -+ * Note, we are holding 'c->tnc_mutex', so we do not have to lock the -+ * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is -+ * changed only when the 'c->tnc_mutex' is held. -+ */ -+ zprev = NULL; -+ znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); -+ while (znode && total_freed < nr && -+ atomic_long_read(&c->clean_zn_cnt) > 0) { -+ int freed; -+ -+ /* -+ * If the znode is clean, but it is in the 'c->cnext' list, this -+ * means that this znode has just been written to flash as a -+ * part of commit and was marked clean. They will be removed -+ * from the list at end commit. We cannot change the list, -+ * because it is not protected by any mutex (design decision to -+ * make commit really independent and parallel to main I/O). So -+ * we just skip these znodes. -+ * -+ * Note, the 'clean_zn_cnt' counters are not updated until -+ * after the commit, so the UBIFS shrinker does not report -+ * the znodes which are in the 'c->cnext' list as freeable. -+ * -+ * Also note, if the root of a sub-tree is not in 'c->cnext', -+ * then the whole sub-tree is not in 'c->cnext' as well, so it -+ * is safe to dump whole sub-tree. -+ */ -+ -+ if (znode->cnext) { -+ /* -+ * Very soon these znodes will be removed from the list -+ * and become freeable. -+ */ -+ *contention = 1; -+ } else if (!ubifs_zn_dirty(znode) && -+ abs(time - znode->time) >= age) { -+ if (znode->parent) -+ znode->parent->zbranch[znode->iip].znode = NULL; -+ else -+ c->zroot.znode = NULL; -+ -+ freed = ubifs_destroy_tnc_subtree(znode); -+ atomic_long_sub(freed, &ubifs_clean_zn_cnt); -+ atomic_long_sub(freed, &c->clean_zn_cnt); -+ ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); -+ total_freed += freed; -+ znode = zprev; -+ } -+ -+ if (unlikely(!c->zroot.znode)) -+ break; -+ -+ zprev = znode; -+ znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); -+ cond_resched(); -+ } -+ -+ return total_freed; -+} -+ -+/** -+ * shrink_tnc_trees - shrink UBIFS TNC trees. -+ * @nr: number of znodes to free -+ * @age: the age of znodes to free -+ * @contention: if any contention, this is set to %1 -+ * -+ * This function walks the list of mounted UBIFS file-systems and frees clean -+ * znodes which are older then @age, until at least @nr znodes are freed. -+ * Returns the number of freed znodes. -+ */ -+static int shrink_tnc_trees(int nr, int age, int *contention) -+{ -+ struct ubifs_info *c; -+ struct list_head *p; -+ unsigned int run_no; -+ int freed = 0; -+ -+ spin_lock(&ubifs_infos_lock); -+ do { -+ run_no = ++shrinker_run_no; -+ } while (run_no == 0); -+ /* Iterate over all mounted UBIFS file-systems and try to shrink them */ -+ p = ubifs_infos.next; -+ while (p != &ubifs_infos) { -+ c = list_entry(p, struct ubifs_info, infos_list); -+ /* -+ * We move the ones we do to the end of the list, so we stop -+ * when we see one we have already done. -+ */ -+ if (c->shrinker_run_no == run_no) -+ break; -+ if (!mutex_trylock(&c->umount_mutex)) { -+ /* Some un-mount is in progress, try next FS */ -+ *contention = 1; -+ p = p->next; -+ continue; -+ } -+ /* -+ * We're holding 'c->umount_mutex', so the file-system won't go -+ * away. -+ */ -+ if (!mutex_trylock(&c->tnc_mutex)) { -+ mutex_unlock(&c->umount_mutex); -+ *contention = 1; -+ p = p->next; -+ continue; -+ } -+ spin_unlock(&ubifs_infos_lock); -+ /* -+ * OK, now we have TNC locked, the file-system cannot go away - -+ * it is safe to reap the cache. -+ */ -+ c->shrinker_run_no = run_no; -+ freed += shrink_tnc(c, nr, age, contention); -+ mutex_unlock(&c->tnc_mutex); -+ spin_lock(&ubifs_infos_lock); -+ /* Get the next list element before we move this one */ -+ p = p->next; -+ /* -+ * Move this one to the end of the list to provide some -+ * fairness. -+ */ -+ list_del(&c->infos_list); -+ list_add_tail(&c->infos_list, &ubifs_infos); -+ mutex_unlock(&c->umount_mutex); -+ if (freed >= nr) -+ break; -+ } -+ spin_unlock(&ubifs_infos_lock); -+ return freed; -+} -+ -+/** -+ * kick_a_thread - kick a background thread to start commit. -+ * -+ * This function kicks a background thread to start background commit. Returns -+ * %-1 if a thread was kicked or there is another reason to assume the memory -+ * will soon be freed or become freeable. If there are no dirty znodes, returns -+ * %0. -+ */ -+static int kick_a_thread(void) -+{ -+ int i; -+ struct ubifs_info *c; -+ -+ /* -+ * Iterate over all mounted UBIFS file-systems and find out if there is -+ * already an ongoing commit operation there. If no, then iterate for -+ * the second time and initiate background commit. -+ */ -+ spin_lock(&ubifs_infos_lock); -+ for (i = 0; i < 2; i++) { -+ list_for_each_entry(c, &ubifs_infos, infos_list) { -+ long dirty_zn_cnt; -+ -+ if (!mutex_trylock(&c->umount_mutex)) { -+ /* -+ * Some un-mount is in progress, it will -+ * certainly free memory, so just return. -+ */ -+ spin_unlock(&ubifs_infos_lock); -+ return -1; -+ } -+ -+ dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); -+ -+ if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || -+ c->ro_media) { -+ mutex_unlock(&c->umount_mutex); -+ continue; -+ } -+ -+ if (c->cmt_state != COMMIT_RESTING) { -+ spin_unlock(&ubifs_infos_lock); -+ mutex_unlock(&c->umount_mutex); -+ return -1; -+ } -+ -+ if (i == 1) { -+ list_del(&c->infos_list); -+ list_add_tail(&c->infos_list, &ubifs_infos); -+ spin_unlock(&ubifs_infos_lock); -+ -+ ubifs_request_bg_commit(c); -+ mutex_unlock(&c->umount_mutex); -+ return -1; -+ } -+ mutex_unlock(&c->umount_mutex); -+ } -+ } -+ spin_unlock(&ubifs_infos_lock); -+ -+ return 0; -+} -+ -+int ubifs_shrinker(int nr, gfp_t gfp_mask) -+{ -+ int freed, contention = 0; -+ long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); -+ -+ if (nr == 0) -+ return clean_zn_cnt; -+ -+ if (!clean_zn_cnt) { -+ /* -+ * No clean znodes, nothing to reap. All we can do in this case -+ * is to kick background threads to start commit, which will -+ * probably make clean znodes which, in turn, will be freeable. -+ * And we return -1 which means will make VM call us again -+ * later. -+ */ -+ dbg_tnc("no clean znodes, kick a thread"); -+ return kick_a_thread(); -+ } -+ -+ freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); -+ if (freed >= nr) -+ goto out; -+ -+ dbg_tnc("not enough old znodes, try to free young ones"); -+ freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); -+ if (freed >= nr) -+ goto out; -+ -+ dbg_tnc("not enough young znodes, free all"); -+ freed += shrink_tnc_trees(nr - freed, 0, &contention); -+ -+ if (!freed && contention) { -+ dbg_tnc("freed nothing, but contention"); -+ return -1; -+ } -+ -+out: -+ dbg_tnc("%d znodes were freed, requested %d", freed, nr); -+ return freed; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/super.c linux-2.6.24/fs/ubifs/super.c ---- linux-2.6.24.orig/fs/ubifs/super.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/super.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,2192 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file implements UBIFS initialization and VFS superblock operations. Some -+ * initialization stuff which is rather large and complex is placed at -+ * corresponding subsystems, but most of it is here. -+ */ -+ -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <linux/module.h> -+#include <linux/ctype.h> -+#include <linux/kthread.h> -+#include <linux/parser.h> -+#include <linux/seq_file.h> -+#include <linux/mount.h> -+#include <linux/writeback.h> -+#include "ubifs.h" -+ -+/* -+ * Maximum amount of memory we may 'kmalloc()' without worrying that we are -+ * allocating too much. -+ */ -+#define UBIFS_KMALLOC_OK (128*1024) -+ -+/* Slab cache for UBIFS inodes */ -+struct kmem_cache *ubifs_inode_slab; -+ -+#ifndef UBIFS_COMPAT_NO_SHRINKER -+/* UBIFS TNC shrinker description */ -+static struct shrinker ubifs_shrinker_info = { -+ .shrink = ubifs_shrinker, -+ .seeks = DEFAULT_SEEKS, -+}; -+#endif -+ -+/** -+ * validate_inode - validate inode. -+ * @c: UBIFS file-system description object -+ * @inode: the inode to validate -+ * -+ * This is a helper function for 'ubifs_iget()' which validates various fields -+ * of a newly built inode to make sure they contain sane values and prevent -+ * possible vulnerabilities. Returns zero if the inode is all right and -+ * a non-zero error code if not. -+ */ -+static int validate_inode(struct ubifs_info *c, const struct inode *inode) -+{ -+ int err; -+ const struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ if (inode->i_size > c->max_inode_sz) { -+ ubifs_err("inode is too large (%lld)", -+ (long long)inode->i_size); -+ return 1; -+ } -+ -+ if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { -+ ubifs_err("unknown compression type %d", ui->compr_type); -+ return 2; -+ } -+ -+ if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX) -+ return 3; -+ -+ if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) -+ return 4; -+ -+ if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) -+ return 5; -+ -+ if (!ubifs_compr_present(ui->compr_type)) { -+ ubifs_warn("inode %lu uses '%s' compression, but it was not " -+ "compiled in", inode->i_ino, -+ ubifs_compr_name(ui->compr_type)); -+ } -+ -+ err = dbg_check_dir_size(c, inode); -+ return err; -+} -+ -+struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) -+{ -+ struct inode *inode; -+ -+ inode = iget(sb, inum); -+ if (!inode) { -+ make_bad_inode(inode); -+ return ERR_PTR(-EINVAL); -+ } -+ -+ return inode; -+} -+ -+void ubifs_read_inode(struct inode *inode) -+{ -+ int err; -+ union ubifs_key key; -+ struct ubifs_ino_node *ino; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ dbg_gen("inode %lu", inode->i_ino); -+ ubifs_assert(inode->i_state & I_LOCK); -+ -+ ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); -+ if (!ino) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ ino_key_init(c, &key, inode->i_ino); -+ -+ err = ubifs_tnc_lookup(c, &key, ino); -+ if (err) -+ goto out_ino; -+ -+ inode->i_flags |= (S_NOCMTIME | S_NOATIME); -+ inode->i_nlink = le32_to_cpu(ino->nlink); -+ inode->i_uid = le32_to_cpu(ino->uid); -+ inode->i_gid = le32_to_cpu(ino->gid); -+ inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); -+ inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); -+ inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); -+ inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); -+ inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); -+ inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); -+ inode->i_mode = le32_to_cpu(ino->mode); -+ inode->i_size = le64_to_cpu(ino->size); -+ -+ ui->data_len = le32_to_cpu(ino->data_len); -+ ui->flags = le32_to_cpu(ino->flags); -+ ui->compr_type = le16_to_cpu(ino->compr_type); -+ ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); -+ ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt); -+ ui->xattr_size = le32_to_cpu(ino->xattr_size); -+ ui->xattr_names = le32_to_cpu(ino->xattr_names); -+ ui->synced_i_size = ui->ui_size = inode->i_size; -+ -+ ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0; -+ -+ err = validate_inode(c, inode); -+ if (err) -+ goto out_invalid; -+ -+ /* Disable read-ahead */ -+ inode->i_mapping->backing_dev_info = &c->bdi; -+ -+ switch (inode->i_mode & S_IFMT) { -+ case S_IFREG: -+ inode->i_mapping->a_ops = &ubifs_file_address_operations; -+ inode->i_op = &ubifs_file_inode_operations; -+ inode->i_fop = &ubifs_file_operations; -+ if (ui->xattr) { -+ ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); -+ if (!ui->data) { -+ err = -ENOMEM; -+ goto out_ino; -+ } -+ memcpy(ui->data, ino->data, ui->data_len); -+ ((char *)ui->data)[ui->data_len] = '\0'; -+ } else if (ui->data_len != 0) { -+ err = 10; -+ goto out_invalid; -+ } -+ break; -+ case S_IFDIR: -+ inode->i_op = &ubifs_dir_inode_operations; -+ inode->i_fop = &ubifs_dir_operations; -+ if (ui->data_len != 0) { -+ err = 11; -+ goto out_invalid; -+ } -+ break; -+ case S_IFLNK: -+ inode->i_op = &ubifs_symlink_inode_operations; -+ if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { -+ err = 12; -+ goto out_invalid; -+ } -+ ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); -+ if (!ui->data) { -+ err = -ENOMEM; -+ goto out_ino; -+ } -+ memcpy(ui->data, ino->data, ui->data_len); -+ ((char *)ui->data)[ui->data_len] = '\0'; -+ break; -+ case S_IFBLK: -+ case S_IFCHR: -+ { -+ dev_t rdev; -+ union ubifs_dev_desc *dev; -+ -+ ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); -+ if (!ui->data) { -+ err = -ENOMEM; -+ goto out_ino; -+ } -+ -+ dev = (union ubifs_dev_desc *)ino->data; -+ if (ui->data_len == sizeof(dev->new)) -+ rdev = new_decode_dev(le32_to_cpu(dev->new)); -+ else if (ui->data_len == sizeof(dev->huge)) -+ rdev = huge_decode_dev(le64_to_cpu(dev->huge)); -+ else { -+ err = 13; -+ goto out_invalid; -+ } -+ memcpy(ui->data, ino->data, ui->data_len); -+ inode->i_op = &ubifs_file_inode_operations; -+ init_special_inode(inode, inode->i_mode, rdev); -+ break; -+ } -+ case S_IFSOCK: -+ case S_IFIFO: -+ inode->i_op = &ubifs_file_inode_operations; -+ init_special_inode(inode, inode->i_mode, 0); -+ if (ui->data_len != 0) { -+ err = 14; -+ goto out_invalid; -+ } -+ break; -+ default: -+ err = 15; -+ goto out_invalid; -+ } -+ -+ ubifs_set_inode_flags(inode); -+ kfree(ino); -+ return; -+ -+out_invalid: -+ ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); -+ dbg_dump_inode(c, inode); -+ dbg_dump_node(c, ino); -+ err = -EINVAL; -+out_ino: -+ kfree(ino); -+out: -+ ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); -+ make_bad_inode(inode); -+ return; -+} -+ -+static struct inode *ubifs_alloc_inode(struct super_block *sb) -+{ -+ struct ubifs_inode *ui; -+ -+ ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS); -+ if (!ui) -+ return NULL; -+ -+ memset((void *)ui + sizeof(struct inode), 0, -+ sizeof(struct ubifs_inode) - sizeof(struct inode)); -+ mutex_init(&ui->ui_mutex); -+ spin_lock_init(&ui->ui_lock); -+ return &ui->vfs_inode; -+}; -+ -+static void ubifs_destroy_inode(struct inode *inode) -+{ -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ kfree(ui->data); -+ kmem_cache_free(ubifs_inode_slab, inode); -+} -+ -+/* -+ * Note, Linux write-back code calls this without 'i_mutex'. -+ */ -+static int ubifs_write_inode(struct inode *inode, int wait) -+{ -+ int err = 0; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ ubifs_assert(!ui->xattr); -+ if (is_bad_inode(inode)) -+ return 0; -+ -+ mutex_lock(&ui->ui_mutex); -+ /* -+ * Due to races between write-back forced by budgeting -+ * (see 'sync_some_inodes()') and pdflush write-back, the inode may -+ * have already been synchronized, do not do this again. This might -+ * also happen if it was synchronized in an VFS operation, e.g. -+ * 'ubifs_link()'. -+ */ -+ if (!ui->dirty) { -+ mutex_unlock(&ui->ui_mutex); -+ return 0; -+ } -+ -+ /* -+ * As an optimization, do not write orphan inodes to the media just -+ * because this is not needed. -+ */ -+ dbg_gen("inode %lu, mode %#x, nlink %u", -+ inode->i_ino, (int)inode->i_mode, inode->i_nlink); -+ if (inode->i_nlink) { -+ err = ubifs_jnl_write_inode(c, inode); -+ if (err) -+ ubifs_err("can't write inode %lu, error %d", -+ inode->i_ino, err); -+ } -+ -+ ui->dirty = 0; -+ mutex_unlock(&ui->ui_mutex); -+ ubifs_release_dirty_inode_budget(c, ui); -+ return err; -+} -+ -+static void ubifs_delete_inode(struct inode *inode) -+{ -+ int err; -+ struct ubifs_info *c = inode->i_sb->s_fs_info; -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ if (ui->xattr) -+ /* -+ * Extended attribute inode deletions are fully handled in -+ * 'ubifs_removexattr()'. These inodes are special and have -+ * limited usage, so there is nothing to do here. -+ */ -+ goto out; -+ -+ dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); -+ ubifs_assert(!atomic_read(&inode->i_count)); -+ ubifs_assert(inode->i_nlink == 0); -+ -+ truncate_inode_pages(&inode->i_data, 0); -+ if (is_bad_inode(inode)) -+ goto out; -+ -+ ui->ui_size = inode->i_size = 0; -+ err = ubifs_jnl_delete_inode(c, inode); -+ if (err) -+ /* -+ * Worst case we have a lost orphan inode wasting space, so a -+ * simple error message is OK here. -+ */ -+ ubifs_err("can't delete inode %lu, error %d", -+ inode->i_ino, err); -+ -+out: -+ if (ui->dirty) -+ ubifs_release_dirty_inode_budget(c, ui); -+ clear_inode(inode); -+} -+ -+static void ubifs_dirty_inode(struct inode *inode) -+{ -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ -+ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); -+ if (!ui->dirty) { -+ ui->dirty = 1; -+ dbg_gen("inode %lu", inode->i_ino); -+ } -+} -+ -+static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) -+{ -+ struct ubifs_info *c = dentry->d_sb->s_fs_info; -+ unsigned long long free; -+ __le32 *uuid = (__le32 *)c->uuid; -+ -+ free = ubifs_get_free_space(c); -+ dbg_gen("free space %lld bytes (%lld blocks)", -+ free, free >> UBIFS_BLOCK_SHIFT); -+ -+ buf->f_type = UBIFS_SUPER_MAGIC; -+ buf->f_bsize = UBIFS_BLOCK_SIZE; -+ buf->f_blocks = c->block_cnt; -+ buf->f_bfree = free >> UBIFS_BLOCK_SHIFT; -+ if (free > c->report_rp_size) -+ buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT; -+ else -+ buf->f_bavail = 0; -+ buf->f_files = 0; -+ buf->f_ffree = 0; -+ buf->f_namelen = UBIFS_MAX_NLEN; -+ buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); -+ buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); -+ ubifs_assert(buf->f_bfree <= c->block_cnt); -+ return 0; -+} -+ -+static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) -+{ -+ struct ubifs_info *c = mnt->mnt_sb->s_fs_info; -+ -+ if (c->mount_opts.unmount_mode == 2) -+ seq_printf(s, ",fast_unmount"); -+ else if (c->mount_opts.unmount_mode == 1) -+ seq_printf(s, ",norm_unmount"); -+ -+ if (c->mount_opts.bulk_read == 2) -+ seq_printf(s, ",bulk_read"); -+ else if (c->mount_opts.bulk_read == 1) -+ seq_printf(s, ",no_bulk_read"); -+ -+ if (c->mount_opts.chk_data_crc == 2) -+ seq_printf(s, ",chk_data_crc"); -+ else if (c->mount_opts.chk_data_crc == 1) -+ seq_printf(s, ",no_chk_data_crc"); -+ -+ if (c->mount_opts.override_compr) { -+ seq_printf(s, ",compr="); -+ seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); -+ } -+ -+ return 0; -+} -+ -+static int ubifs_sync_fs(struct super_block *sb, int wait) -+{ -+ int i, err; -+ struct ubifs_info *c = sb->s_fs_info; -+ struct writeback_control wbc = { -+ .sync_mode = WB_SYNC_ALL, -+ .range_start = 0, -+ .range_end = LLONG_MAX, -+ .nr_to_write = LONG_MAX, -+ }; -+ -+ /* -+ * Zero @wait is just an advisory thing to help the file system shove -+ * lots of data into the queues, and there will be the second -+ * '->sync_fs()' call, with non-zero @wait. -+ */ -+ if (!wait) -+ return 0; -+ -+ if (sb->s_flags & MS_RDONLY) -+ return 0; -+ -+ /* -+ * VFS calls '->sync_fs()' before synchronizing all dirty inodes and -+ * pages, so synchronize them first, then commit the journal. Strictly -+ * speaking, it is not necessary to commit the journal here, -+ * synchronizing write-buffers would be enough. But committing makes -+ * UBIFS free space predictions much more accurate, so we want to let -+ * the user be able to get more accurate results of 'statfs()' after -+ * they synchronize the file system. -+ */ -+ generic_sync_sb_inodes(sb, &wbc); -+ -+ /* -+ * Synchronize write buffers, because 'ubifs_run_commit()' does not -+ * do this if it waits for an already running commit. -+ */ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); -+ if (err) -+ return err; -+ } -+ -+ err = ubifs_run_commit(c); -+ if (err) -+ return err; -+ -+ return ubi_sync(c->vi.ubi_num); -+} -+ -+/** -+ * init_constants_early - initialize UBIFS constants. -+ * @c: UBIFS file-system description object -+ * -+ * This function initialize UBIFS constants which do not need the superblock to -+ * be read. It also checks that the UBI volume satisfies basic UBIFS -+ * requirements. Returns zero in case of success and a negative error code in -+ * case of failure. -+ */ -+static int init_constants_early(struct ubifs_info *c) -+{ -+ if (c->vi.corrupted) { -+ ubifs_warn("UBI volume is corrupted - read-only mode"); -+ c->ro_media = 1; -+ } -+ -+ if (c->di.ro_mode) { -+ ubifs_msg("read-only UBI device"); -+ c->ro_media = 1; -+ } -+ -+ if (c->vi.vol_type == UBI_STATIC_VOLUME) { -+ ubifs_msg("static UBI volume - read-only mode"); -+ c->ro_media = 1; -+ } -+ -+ c->leb_cnt = c->vi.size; -+ c->leb_size = c->vi.usable_leb_size; -+ c->half_leb_size = c->leb_size / 2; -+ c->min_io_size = c->di.min_io_size; -+ c->min_io_shift = fls(c->min_io_size) - 1; -+ -+ if (c->leb_size < UBIFS_MIN_LEB_SZ) { -+ ubifs_err("too small LEBs (%d bytes), min. is %d bytes", -+ c->leb_size, UBIFS_MIN_LEB_SZ); -+ return -EINVAL; -+ } -+ -+ if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { -+ ubifs_err("too few LEBs (%d), min. is %d", -+ c->leb_cnt, UBIFS_MIN_LEB_CNT); -+ return -EINVAL; -+ } -+ -+ if (!is_power_of_2(c->min_io_size)) { -+ ubifs_err("bad min. I/O size %d", c->min_io_size); -+ return -EINVAL; -+ } -+ -+ /* -+ * UBIFS aligns all node to 8-byte boundary, so to make function in -+ * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is -+ * less than 8. -+ */ -+ if (c->min_io_size < 8) { -+ c->min_io_size = 8; -+ c->min_io_shift = 3; -+ } -+ -+ c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); -+ c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size); -+ -+ /* -+ * Initialize node length ranges which are mostly needed for node -+ * length validation. -+ */ -+ c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ; -+ c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ; -+ c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ; -+ c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ; -+ c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ; -+ c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ; -+ -+ c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ; -+ c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ; -+ c->ranges[UBIFS_ORPH_NODE].min_len = -+ UBIFS_ORPH_NODE_SZ + sizeof(__le64); -+ c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size; -+ c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ; -+ c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ; -+ c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ; -+ c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ; -+ c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ; -+ c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ; -+ /* -+ * Minimum indexing node size is amended later when superblock is -+ * read and the key length is known. -+ */ -+ c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ; -+ /* -+ * Maximum indexing node size is amended later when superblock is -+ * read and the fanout is known. -+ */ -+ c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; -+ -+ /* -+ * Initialize dead and dark LEB space watermarks. See gc.c for comments -+ * about these values. -+ */ -+ c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); -+ c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); -+ -+ /* -+ * Calculate how many bytes would be wasted at the end of LEB if it was -+ * fully filled with data nodes of maximum size. This is used in -+ * calculations when reporting free space. -+ */ -+ c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; -+ -+ /* Buffer size for bulk-reads */ -+ c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; -+ if (c->max_bu_buf_len > c->leb_size) -+ c->max_bu_buf_len = c->leb_size; -+ return 0; -+} -+ -+/** -+ * bud_wbuf_callback - bud LEB write-buffer synchronization call-back. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB the write-buffer was synchronized to -+ * @free: how many free bytes left in this LEB -+ * @pad: how many bytes were padded -+ * -+ * This is a callback function which is called by the I/O unit when the -+ * write-buffer is synchronized. We need this to correctly maintain space -+ * accounting in bud logical eraseblocks. This function returns zero in case of -+ * success and a negative error code in case of failure. -+ * -+ * This function actually belongs to the journal, but we keep it here because -+ * we want to keep it static. -+ */ -+static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) -+{ -+ return ubifs_update_one_lp(c, lnum, free, pad, 0, 0); -+} -+ -+/* -+ * init_constants_sb - initialize UBIFS constants. -+ * @c: UBIFS file-system description object -+ * -+ * This is a helper function which initializes various UBIFS constants after -+ * the superblock has been read. It also checks various UBIFS parameters and -+ * makes sure they are all right. Returns zero in case of success and a -+ * negative error code in case of failure. -+ */ -+static int init_constants_sb(struct ubifs_info *c) -+{ -+ int tmp, err; -+ long long tmp64; -+ -+ c->main_bytes = (long long)c->main_lebs * c->leb_size; -+ c->max_znode_sz = sizeof(struct ubifs_znode) + -+ c->fanout * sizeof(struct ubifs_zbranch); -+ -+ tmp = ubifs_idx_node_sz(c, 1); -+ c->ranges[UBIFS_IDX_NODE].min_len = tmp; -+ c->min_idx_node_sz = ALIGN(tmp, 8); -+ -+ tmp = ubifs_idx_node_sz(c, c->fanout); -+ c->ranges[UBIFS_IDX_NODE].max_len = tmp; -+ c->max_idx_node_sz = ALIGN(tmp, 8); -+ -+ /* Make sure LEB size is large enough to fit full commit */ -+ tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; -+ tmp = ALIGN(tmp, c->min_io_size); -+ if (tmp > c->leb_size) { -+ dbg_err("too small LEB size %d, at least %d needed", -+ c->leb_size, tmp); -+ return -EINVAL; -+ } -+ -+ /* -+ * Make sure that the log is large enough to fit reference nodes for -+ * all buds plus one reserved LEB. -+ */ -+ tmp64 = c->max_bud_bytes + c->leb_size - 1; -+ c->max_bud_cnt = div_u64(tmp64, c->leb_size); -+ tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); -+ tmp /= c->leb_size; -+ tmp += 1; -+ if (c->log_lebs < tmp) { -+ dbg_err("too small log %d LEBs, required min. %d LEBs", -+ c->log_lebs, tmp); -+ return -EINVAL; -+ } -+ -+ /* -+ * When budgeting we assume worst-case scenarios when the pages are not -+ * be compressed and direntries are of the maximum size. -+ * -+ * Note, data, which may be stored in inodes is budgeted separately, so -+ * it is not included into 'c->inode_budget'. -+ */ -+ c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; -+ c->inode_budget = UBIFS_INO_NODE_SZ; -+ c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; -+ -+ /* -+ * When the amount of flash space used by buds becomes -+ * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit. -+ * The writers are unblocked when the commit is finished. To avoid -+ * writers to be blocked UBIFS initiates background commit in advance, -+ * when number of bud bytes becomes above the limit defined below. -+ */ -+ c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4; -+ -+ /* -+ * Ensure minimum journal size. All the bytes in the journal heads are -+ * considered to be used, when calculating the current journal usage. -+ * Consequently, if the journal is too small, UBIFS will treat it as -+ * always full. -+ */ -+ tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1; -+ if (c->bg_bud_bytes < tmp64) -+ c->bg_bud_bytes = tmp64; -+ if (c->max_bud_bytes < tmp64 + c->leb_size) -+ c->max_bud_bytes = tmp64 + c->leb_size; -+ -+ err = ubifs_calc_lpt_geom(c); -+ if (err) -+ return err; -+ -+ return 0; -+} -+ -+/* -+ * init_constants_master - initialize UBIFS constants. -+ * @c: UBIFS file-system description object -+ * -+ * This is a helper function which initializes various UBIFS constants after -+ * the master node has been read. It also checks various UBIFS parameters and -+ * makes sure they are all right. -+ */ -+static void init_constants_master(struct ubifs_info *c) -+{ -+ long long tmp64; -+ -+ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); -+ -+ /* -+ * Calculate total amount of FS blocks. This number is not used -+ * internally because it does not make much sense for UBIFS, but it is -+ * necessary to report something for the 'statfs()' call. -+ * -+ * Subtract the LEB reserved for GC, the LEB which is reserved for -+ * deletions, minimum LEBs for the index, and assume only one journal -+ * head is available. -+ */ -+ tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; -+ tmp64 *= (long long)c->leb_size - c->leb_overhead; -+ tmp64 = ubifs_reported_space(c, tmp64); -+ c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; -+} -+ -+/** -+ * take_gc_lnum - reserve GC LEB. -+ * @c: UBIFS file-system description object -+ * -+ * This function ensures that the LEB reserved for garbage collection is marked -+ * as "taken" in lprops. We also have to set free space to LEB size and dirty -+ * space to zero, because lprops may contain out-of-date information if the -+ * file-system was un-mounted before it has been committed. This function -+ * returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+static int take_gc_lnum(struct ubifs_info *c) -+{ -+ int err; -+ -+ if (c->gc_lnum == -1) { -+ ubifs_err("no LEB for GC"); -+ return -EINVAL; -+ } -+ -+ /* And we have to tell lprops that this LEB is taken */ -+ err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, -+ LPROPS_TAKEN, 0, 0); -+ return err; -+} -+ -+/** -+ * alloc_wbufs - allocate write-buffers. -+ * @c: UBIFS file-system description object -+ * -+ * This helper function allocates and initializes UBIFS write-buffers. Returns -+ * zero in case of success and %-ENOMEM in case of failure. -+ */ -+static int alloc_wbufs(struct ubifs_info *c) -+{ -+ int i, err; -+ -+ c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), -+ GFP_KERNEL); -+ if (!c->jheads) -+ return -ENOMEM; -+ -+ /* Initialize journal heads */ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ INIT_LIST_HEAD(&c->jheads[i].buds_list); -+ err = ubifs_wbuf_init(c, &c->jheads[i].wbuf); -+ if (err) -+ return err; -+ -+ c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; -+ c->jheads[i].wbuf.jhead = i; -+ } -+ -+ c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; -+ /* -+ * Garbage Collector head likely contains long-term data and -+ * does not need to be synchronized by timer. -+ */ -+ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; -+ c->jheads[GCHD].wbuf.timeout = 0; -+ -+ return 0; -+} -+ -+/** -+ * free_wbufs - free write-buffers. -+ * @c: UBIFS file-system description object -+ */ -+static void free_wbufs(struct ubifs_info *c) -+{ -+ int i; -+ -+ if (c->jheads) { -+ for (i = 0; i < c->jhead_cnt; i++) { -+ kfree(c->jheads[i].wbuf.buf); -+ kfree(c->jheads[i].wbuf.inodes); -+ } -+ kfree(c->jheads); -+ c->jheads = NULL; -+ } -+} -+ -+/** -+ * free_orphans - free orphans. -+ * @c: UBIFS file-system description object -+ */ -+static void free_orphans(struct ubifs_info *c) -+{ -+ struct ubifs_orphan *orph; -+ -+ while (c->orph_dnext) { -+ orph = c->orph_dnext; -+ c->orph_dnext = orph->dnext; -+ list_del(&orph->list); -+ kfree(orph); -+ } -+ -+ while (!list_empty(&c->orph_list)) { -+ orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); -+ list_del(&orph->list); -+ kfree(orph); -+ dbg_err("orphan list not empty at unmount"); -+ } -+ -+ vfree(c->orph_buf); -+ c->orph_buf = NULL; -+} -+ -+/** -+ * free_buds - free per-bud objects. -+ * @c: UBIFS file-system description object -+ */ -+static void free_buds(struct ubifs_info *c) -+{ -+ struct rb_node *this = c->buds.rb_node; -+ struct ubifs_bud *bud; -+ -+ while (this) { -+ if (this->rb_left) -+ this = this->rb_left; -+ else if (this->rb_right) -+ this = this->rb_right; -+ else { -+ bud = rb_entry(this, struct ubifs_bud, rb); -+ this = rb_parent(this); -+ if (this) { -+ if (this->rb_left == &bud->rb) -+ this->rb_left = NULL; -+ else -+ this->rb_right = NULL; -+ } -+ kfree(bud); -+ } -+ } -+} -+ -+/** -+ * check_volume_empty - check if the UBI volume is empty. -+ * @c: UBIFS file-system description object -+ * -+ * This function checks if the UBIFS volume is empty by looking if its LEBs are -+ * mapped or not. The result of checking is stored in the @c->empty variable. -+ * Returns zero in case of success and a negative error code in case of -+ * failure. -+ */ -+static int check_volume_empty(struct ubifs_info *c) -+{ -+ int lnum, err; -+ -+ c->empty = 1; -+ for (lnum = 0; lnum < c->leb_cnt; lnum++) { -+ err = ubi_is_mapped(c->ubi, lnum); -+ if (unlikely(err < 0)) -+ return err; -+ if (err == 1) { -+ c->empty = 0; -+ break; -+ } -+ -+ cond_resched(); -+ } -+ -+ return 0; -+} -+ -+/* -+ * UBIFS mount options. -+ * -+ * Opt_fast_unmount: do not run a journal commit before un-mounting -+ * Opt_norm_unmount: run a journal commit before un-mounting -+ * Opt_bulk_read: enable bulk-reads -+ * Opt_no_bulk_read: disable bulk-reads -+ * Opt_chk_data_crc: check CRCs when reading data nodes -+ * Opt_no_chk_data_crc: do not check CRCs when reading data nodes -+ * Opt_override_compr: override default compressor -+ * Opt_err: just end of array marker -+ */ -+enum { -+ Opt_fast_unmount, -+ Opt_norm_unmount, -+ Opt_bulk_read, -+ Opt_no_bulk_read, -+ Opt_chk_data_crc, -+ Opt_no_chk_data_crc, -+ Opt_override_compr, -+ Opt_err, -+}; -+ -+static match_table_t tokens = { -+ {Opt_fast_unmount, "fast_unmount"}, -+ {Opt_norm_unmount, "norm_unmount"}, -+ {Opt_bulk_read, "bulk_read"}, -+ {Opt_no_bulk_read, "no_bulk_read"}, -+ {Opt_chk_data_crc, "chk_data_crc"}, -+ {Opt_no_chk_data_crc, "no_chk_data_crc"}, -+ {Opt_override_compr, "compr=%s"}, -+ {Opt_err, NULL}, -+}; -+ -+/** -+ * ubifs_parse_options - parse mount parameters. -+ * @c: UBIFS file-system description object -+ * @options: parameters to parse -+ * @is_remount: non-zero if this is FS re-mount -+ * -+ * This function parses UBIFS mount options and returns zero in case success -+ * and a negative error code in case of failure. -+ */ -+static int ubifs_parse_options(struct ubifs_info *c, char *options, -+ int is_remount) -+{ -+ char *p; -+ substring_t args[MAX_OPT_ARGS]; -+ -+ if (!options) -+ return 0; -+ -+ while ((p = strsep(&options, ","))) { -+ int token; -+ -+ if (!*p) -+ continue; -+ -+ token = match_token(p, tokens, args); -+ switch (token) { -+ /* -+ * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. -+ * We accepte them in order to be backware-compatible. But this -+ * should be removed at some point. -+ */ -+ case Opt_fast_unmount: -+ c->mount_opts.unmount_mode = 2; -+ break; -+ case Opt_norm_unmount: -+ c->mount_opts.unmount_mode = 1; -+ break; -+ case Opt_bulk_read: -+ c->mount_opts.bulk_read = 2; -+ c->bulk_read = 1; -+ break; -+ case Opt_no_bulk_read: -+ c->mount_opts.bulk_read = 1; -+ c->bulk_read = 0; -+ break; -+ case Opt_chk_data_crc: -+ c->mount_opts.chk_data_crc = 2; -+ c->no_chk_data_crc = 0; -+ break; -+ case Opt_no_chk_data_crc: -+ c->mount_opts.chk_data_crc = 1; -+ c->no_chk_data_crc = 1; -+ break; -+ case Opt_override_compr: -+ { -+ char *name = match_strdup(&args[0]); -+ -+ if (!name) -+ return -ENOMEM; -+ if (!strcmp(name, "none")) -+ c->mount_opts.compr_type = UBIFS_COMPR_NONE; -+ else if (!strcmp(name, "lzo")) -+ c->mount_opts.compr_type = UBIFS_COMPR_LZO; -+ else if (!strcmp(name, "zlib")) -+ c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; -+ else { -+ ubifs_err("unknown compressor \"%s\"", name); -+ kfree(name); -+ return -EINVAL; -+ } -+ kfree(name); -+ c->mount_opts.override_compr = 1; -+ c->default_compr = c->mount_opts.compr_type; -+ break; -+ } -+ default: -+ ubifs_err("unrecognized mount option \"%s\" " -+ "or missing value", p); -+ return -EINVAL; -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * destroy_journal - destroy journal data structures. -+ * @c: UBIFS file-system description object -+ * -+ * This function destroys journal data structures including those that may have -+ * been created by recovery functions. -+ */ -+static void destroy_journal(struct ubifs_info *c) -+{ -+ while (!list_empty(&c->unclean_leb_list)) { -+ struct ubifs_unclean_leb *ucleb; -+ -+ ucleb = list_entry(c->unclean_leb_list.next, -+ struct ubifs_unclean_leb, list); -+ list_del(&ucleb->list); -+ kfree(ucleb); -+ } -+ while (!list_empty(&c->old_buds)) { -+ struct ubifs_bud *bud; -+ -+ bud = list_entry(c->old_buds.next, struct ubifs_bud, list); -+ list_del(&bud->list); -+ kfree(bud); -+ } -+ ubifs_destroy_idx_gc(c); -+ ubifs_destroy_size_tree(c); -+ ubifs_tnc_close(c); -+ free_buds(c); -+} -+ -+/** -+ * bu_init - initialize bulk-read information. -+ * @c: UBIFS file-system description object -+ */ -+static void bu_init(struct ubifs_info *c) -+{ -+ ubifs_assert(c->bulk_read == 1); -+ -+ if (c->bu.buf) -+ return; /* Already initialized */ -+ -+again: -+ c->bu.buf = kmalloc(c->max_bu_buf_len, GFP_KERNEL | __GFP_NOWARN); -+ if (!c->bu.buf) { -+ if (c->max_bu_buf_len > UBIFS_KMALLOC_OK) { -+ c->max_bu_buf_len = UBIFS_KMALLOC_OK; -+ goto again; -+ } -+ -+ /* Just disable bulk-read */ -+ ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, " -+ "disabling it", c->max_bu_buf_len); -+ c->mount_opts.bulk_read = 1; -+ c->bulk_read = 0; -+ return; -+ } -+} -+ -+/** -+ * check_free_space - check if there is enough free space to mount. -+ * @c: UBIFS file-system description object -+ * -+ * This function makes sure UBIFS has enough free space to be mounted in -+ * read/write mode. UBIFS must always have some free space to allow deletions. -+ */ -+static int check_free_space(struct ubifs_info *c) -+{ -+ ubifs_assert(c->dark_wm > 0); -+ if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { -+ ubifs_err("insufficient free space to mount in read/write mode"); -+ dbg_dump_budg(c); -+ dbg_dump_lprops(c); -+ return -ENOSPC; -+ } -+ return 0; -+} -+ -+/** -+ * mount_ubifs - mount UBIFS file-system. -+ * @c: UBIFS file-system description object -+ * -+ * This function mounts UBIFS file system. Returns zero in case of success and -+ * a negative error code in case of failure. -+ * -+ * Note, the function does not de-allocate resources it it fails half way -+ * through, and the caller has to do this instead. -+ */ -+static int mount_ubifs(struct ubifs_info *c) -+{ -+ struct super_block *sb = c->vfs_sb; -+ int err, mounted_read_only = (sb->s_flags & MS_RDONLY); -+ long long x; -+ size_t sz; -+ -+ err = init_constants_early(c); -+ if (err) -+ return err; -+ -+ err = ubifs_debugging_init(c); -+ if (err) -+ return err; -+ -+ err = check_volume_empty(c); -+ if (err) -+ goto out_free; -+ -+ if (c->empty && (mounted_read_only || c->ro_media)) { -+ /* -+ * This UBI volume is empty, and read-only, or the file system -+ * is mounted read-only - we cannot format it. -+ */ -+ ubifs_err("can't format empty UBI volume: read-only %s", -+ c->ro_media ? "UBI volume" : "mount"); -+ err = -EROFS; -+ goto out_free; -+ } -+ -+ if (c->ro_media && !mounted_read_only) { -+ ubifs_err("cannot mount read-write - read-only media"); -+ err = -EROFS; -+ goto out_free; -+ } -+ -+ /* -+ * The requirement for the buffer is that it should fit indexing B-tree -+ * height amount of integers. We assume the height if the TNC tree will -+ * never exceed 64. -+ */ -+ err = -ENOMEM; -+ c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL); -+ if (!c->bottom_up_buf) -+ goto out_free; -+ -+ c->sbuf = vmalloc(c->leb_size); -+ if (!c->sbuf) -+ goto out_free; -+ -+ if (!mounted_read_only) { -+ c->ileb_buf = vmalloc(c->leb_size); -+ if (!c->ileb_buf) -+ goto out_free; -+ } -+ -+ if (c->bulk_read == 1) -+ bu_init(c); -+ -+ /* -+ * We have to check all CRCs, even for data nodes, when we mount the FS -+ * (specifically, when we are replaying). -+ */ -+ c->always_chk_crc = 1; -+ -+ err = ubifs_read_superblock(c); -+ if (err) -+ goto out_free; -+ -+ /* -+ * Make sure the compressor which is set as default in the superblock -+ * or overridden by mount options is actually compiled in. -+ */ -+ if (!ubifs_compr_present(c->default_compr)) { -+ ubifs_err("'compressor \"%s\" is not compiled in", -+ ubifs_compr_name(c->default_compr)); -+ goto out_free; -+ } -+ -+ err = init_constants_sb(c); -+ if (err) -+ goto out_free; -+ -+ sz = ALIGN(c->max_idx_node_sz, c->min_io_size); -+ sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); -+ c->cbuf = kmalloc(sz, GFP_NOFS); -+ if (!c->cbuf) { -+ err = -ENOMEM; -+ goto out_free; -+ } -+ -+ sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); -+ if (!mounted_read_only) { -+ err = alloc_wbufs(c); -+ if (err) -+ goto out_cbuf; -+ -+ /* Create background thread */ -+ c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); -+ if (IS_ERR(c->bgt)) { -+ err = PTR_ERR(c->bgt); -+ c->bgt = NULL; -+ ubifs_err("cannot spawn \"%s\", error %d", -+ c->bgt_name, err); -+ goto out_wbufs; -+ } -+ wake_up_process(c->bgt); -+ } -+ -+ err = ubifs_read_master(c); -+ if (err) -+ goto out_master; -+ -+ init_constants_master(c); -+ -+ if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { -+ ubifs_msg("recovery needed"); -+ c->need_recovery = 1; -+ if (!mounted_read_only) { -+ err = ubifs_recover_inl_heads(c, c->sbuf); -+ if (err) -+ goto out_master; -+ } -+ } else if (!mounted_read_only) { -+ /* -+ * Set the "dirty" flag so that if we reboot uncleanly we -+ * will notice this immediately on the next mount. -+ */ -+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); -+ err = ubifs_write_master(c); -+ if (err) -+ goto out_master; -+ } -+ -+ err = ubifs_lpt_init(c, 1, !mounted_read_only); -+ if (err) -+ goto out_lpt; -+ -+ err = dbg_check_idx_size(c, c->old_idx_sz); -+ if (err) -+ goto out_lpt; -+ -+ err = ubifs_replay_journal(c); -+ if (err) -+ goto out_journal; -+ -+ err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); -+ if (err) -+ goto out_orphans; -+ -+ if (!mounted_read_only) { -+ int lnum; -+ -+ err = check_free_space(c); -+ if (err) -+ goto out_orphans; -+ -+ /* Check for enough log space */ -+ lnum = c->lhead_lnum + 1; -+ if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) -+ lnum = UBIFS_LOG_LNUM; -+ if (lnum == c->ltail_lnum) { -+ err = ubifs_consolidate_log(c); -+ if (err) -+ goto out_orphans; -+ } -+ -+ if (c->need_recovery) { -+ err = ubifs_recover_size(c); -+ if (err) -+ goto out_orphans; -+ err = ubifs_rcvry_gc_commit(c); -+ } else { -+ err = take_gc_lnum(c); -+ if (err) -+ goto out_orphans; -+ -+ /* -+ * GC LEB may contain garbage if there was an unclean -+ * reboot, and it should be un-mapped. -+ */ -+ err = ubifs_leb_unmap(c, c->gc_lnum); -+ if (err) -+ return err; -+ } -+ -+ err = dbg_check_lprops(c); -+ if (err) -+ goto out_orphans; -+ } else if (c->need_recovery) { -+ err = ubifs_recover_size(c); -+ if (err) -+ goto out_orphans; -+ } else { -+ /* -+ * Even if we mount read-only, we have to set space in GC LEB -+ * to proper value because this affects UBIFS free space -+ * reporting. We do not want to have a situation when -+ * re-mounting from R/O to R/W changes amount of free space. -+ */ -+ err = take_gc_lnum(c); -+ if (err) -+ goto out_orphans; -+ } -+ -+ spin_lock(&ubifs_infos_lock); -+ list_add_tail(&c->infos_list, &ubifs_infos); -+ spin_unlock(&ubifs_infos_lock); -+ -+ if (c->need_recovery) { -+ if (mounted_read_only) -+ ubifs_msg("recovery deferred"); -+ else { -+ c->need_recovery = 0; -+ ubifs_msg("recovery completed"); -+ /* GC LEB has to be empty and taken at this point */ -+ ubifs_assert(c->lst.taken_empty_lebs == 1); -+ } -+ } else -+ ubifs_assert(c->lst.taken_empty_lebs == 1); -+ -+ err = dbg_check_filesystem(c); -+ if (err) -+ goto out_infos; -+ -+ err = dbg_debugfs_init_fs(c); -+ if (err) -+ goto out_infos; -+ -+ c->always_chk_crc = 0; -+ -+ ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", -+ c->vi.ubi_num, c->vi.vol_id, c->vi.name); -+ if (mounted_read_only) -+ ubifs_msg("mounted read-only"); -+ x = (long long)c->main_lebs * c->leb_size; -+ ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " -+ "LEBs)", x, x >> 10, x >> 20, c->main_lebs); -+ x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; -+ ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " -+ "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); -+ ubifs_msg("media format: %d (latest is %d)", -+ c->fmt_version, UBIFS_FORMAT_VERSION); -+ ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); -+ ubifs_msg("reserved for root: %llu bytes (%llu KiB)", -+ c->report_rp_size, c->report_rp_size >> 10); -+ -+ dbg_msg("compiled on: " __DATE__ " at " __TIME__); -+ dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); -+ dbg_msg("LEB size: %d bytes (%d KiB)", -+ c->leb_size, c->leb_size >> 10); -+ dbg_msg("data journal heads: %d", -+ c->jhead_cnt - NONDATA_JHEADS_CNT); -+ dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" -+ "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", -+ c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], -+ c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], -+ c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], -+ c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); -+ dbg_msg("big_lpt %d", c->big_lpt); -+ dbg_msg("log LEBs: %d (%d - %d)", -+ c->log_lebs, UBIFS_LOG_LNUM, c->log_last); -+ dbg_msg("LPT area LEBs: %d (%d - %d)", -+ c->lpt_lebs, c->lpt_first, c->lpt_last); -+ dbg_msg("orphan area LEBs: %d (%d - %d)", -+ c->orph_lebs, c->orph_first, c->orph_last); -+ dbg_msg("main area LEBs: %d (%d - %d)", -+ c->main_lebs, c->main_first, c->leb_cnt - 1); -+ dbg_msg("index LEBs: %d", c->lst.idx_lebs); -+ dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", -+ c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); -+ dbg_msg("key hash type: %d", c->key_hash_type); -+ dbg_msg("tree fanout: %d", c->fanout); -+ dbg_msg("reserved GC LEB: %d", c->gc_lnum); -+ dbg_msg("first main LEB: %d", c->main_first); -+ dbg_msg("max. znode size %d", c->max_znode_sz); -+ dbg_msg("max. index node size %d", c->max_idx_node_sz); -+ dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", -+ UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); -+ dbg_msg("node sizes: trun %zu, sb %zu, master %zu", -+ UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); -+ dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", -+ UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); -+ dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", -+ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, -+ UBIFS_MAX_DENT_NODE_SZ); -+ dbg_msg("dead watermark: %d", c->dead_wm); -+ dbg_msg("dark watermark: %d", c->dark_wm); -+ dbg_msg("LEB overhead: %d", c->leb_overhead); -+ x = (long long)c->main_lebs * c->dark_wm; -+ dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", -+ x, x >> 10, x >> 20); -+ dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", -+ c->max_bud_bytes, c->max_bud_bytes >> 10, -+ c->max_bud_bytes >> 20); -+ dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", -+ c->bg_bud_bytes, c->bg_bud_bytes >> 10, -+ c->bg_bud_bytes >> 20); -+ dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", -+ c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); -+ dbg_msg("max. seq. number: %llu", c->max_sqnum); -+ dbg_msg("commit number: %llu", c->cmt_no); -+ -+ return 0; -+ -+out_infos: -+ spin_lock(&ubifs_infos_lock); -+ list_del(&c->infos_list); -+ spin_unlock(&ubifs_infos_lock); -+out_orphans: -+ free_orphans(c); -+out_journal: -+ destroy_journal(c); -+out_lpt: -+ ubifs_lpt_free(c, 0); -+out_master: -+ kfree(c->mst_node); -+ kfree(c->rcvrd_mst_node); -+ if (c->bgt) -+ kthread_stop(c->bgt); -+out_wbufs: -+ free_wbufs(c); -+out_cbuf: -+ kfree(c->cbuf); -+out_free: -+ kfree(c->bu.buf); -+ vfree(c->ileb_buf); -+ vfree(c->sbuf); -+ kfree(c->bottom_up_buf); -+ ubifs_debugging_exit(c); -+ return err; -+} -+ -+/** -+ * ubifs_umount - un-mount UBIFS file-system. -+ * @c: UBIFS file-system description object -+ * -+ * Note, this function is called to free allocated resourced when un-mounting, -+ * as well as free resources when an error occurred while we were half way -+ * through mounting (error path cleanup function). So it has to make sure the -+ * resource was actually allocated before freeing it. -+ */ -+static void ubifs_umount(struct ubifs_info *c) -+{ -+ dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, -+ c->vi.vol_id); -+ -+ dbg_debugfs_exit_fs(c); -+ spin_lock(&ubifs_infos_lock); -+ list_del(&c->infos_list); -+ spin_unlock(&ubifs_infos_lock); -+ -+ if (c->bgt) -+ kthread_stop(c->bgt); -+ -+ destroy_journal(c); -+ free_wbufs(c); -+ free_orphans(c); -+ ubifs_lpt_free(c, 0); -+ -+ kfree(c->cbuf); -+ kfree(c->rcvrd_mst_node); -+ kfree(c->mst_node); -+ kfree(c->bu.buf); -+ vfree(c->ileb_buf); -+ vfree(c->sbuf); -+ kfree(c->bottom_up_buf); -+ ubifs_debugging_exit(c); -+} -+ -+/** -+ * ubifs_remount_rw - re-mount in read-write mode. -+ * @c: UBIFS file-system description object -+ * -+ * UBIFS avoids allocating many unnecessary resources when mounted in read-only -+ * mode. This function allocates the needed resources and re-mounts UBIFS in -+ * read-write mode. -+ */ -+static int ubifs_remount_rw(struct ubifs_info *c) -+{ -+ int err, lnum; -+ -+ mutex_lock(&c->umount_mutex); -+ dbg_save_space_info(c); -+ c->remounting_rw = 1; -+ c->always_chk_crc = 1; -+ -+ err = check_free_space(c); -+ if (err) -+ goto out; -+ -+ if (c->old_leb_cnt != c->leb_cnt) { -+ struct ubifs_sb_node *sup; -+ -+ sup = ubifs_read_sb_node(c); -+ if (IS_ERR(sup)) { -+ err = PTR_ERR(sup); -+ goto out; -+ } -+ sup->leb_cnt = cpu_to_le32(c->leb_cnt); -+ err = ubifs_write_sb_node(c, sup); -+ if (err) -+ goto out; -+ } -+ -+ if (c->need_recovery) { -+ ubifs_msg("completing deferred recovery"); -+ err = ubifs_write_rcvrd_mst_node(c); -+ if (err) -+ goto out; -+ err = ubifs_recover_size(c); -+ if (err) -+ goto out; -+ err = ubifs_clean_lebs(c, c->sbuf); -+ if (err) -+ goto out; -+ err = ubifs_recover_inl_heads(c, c->sbuf); -+ if (err) -+ goto out; -+ } else { -+ /* A readonly mount is not allowed to have orphans */ -+ ubifs_assert(c->tot_orphans == 0); -+ err = ubifs_clear_orphans(c); -+ if (err) -+ goto out; -+ } -+ -+ if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { -+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); -+ err = ubifs_write_master(c); -+ if (err) -+ goto out; -+ } -+ -+ c->ileb_buf = vmalloc(c->leb_size); -+ if (!c->ileb_buf) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ err = ubifs_lpt_init(c, 0, 1); -+ if (err) -+ goto out; -+ -+ err = alloc_wbufs(c); -+ if (err) -+ goto out; -+ -+ ubifs_create_buds_lists(c); -+ -+ /* Create background thread */ -+ c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); -+ if (IS_ERR(c->bgt)) { -+ err = PTR_ERR(c->bgt); -+ c->bgt = NULL; -+ ubifs_err("cannot spawn \"%s\", error %d", -+ c->bgt_name, err); -+ goto out; -+ } -+ wake_up_process(c->bgt); -+ -+ c->orph_buf = vmalloc(c->leb_size); -+ if (!c->orph_buf) { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ /* Check for enough log space */ -+ lnum = c->lhead_lnum + 1; -+ if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) -+ lnum = UBIFS_LOG_LNUM; -+ if (lnum == c->ltail_lnum) { -+ err = ubifs_consolidate_log(c); -+ if (err) -+ goto out; -+ } -+ -+ if (c->need_recovery) -+ err = ubifs_rcvry_gc_commit(c); -+ else -+ err = ubifs_leb_unmap(c, c->gc_lnum); -+ if (err) -+ goto out; -+ -+ if (c->need_recovery) { -+ c->need_recovery = 0; -+ ubifs_msg("deferred recovery completed"); -+ } -+ -+ dbg_gen("re-mounted read-write"); -+ c->vfs_sb->s_flags &= ~MS_RDONLY; -+ c->remounting_rw = 0; -+ c->always_chk_crc = 0; -+ err = dbg_check_space_info(c); -+ mutex_unlock(&c->umount_mutex); -+ return err; -+ -+out: -+ vfree(c->orph_buf); -+ c->orph_buf = NULL; -+ if (c->bgt) { -+ kthread_stop(c->bgt); -+ c->bgt = NULL; -+ } -+ free_wbufs(c); -+ vfree(c->ileb_buf); -+ c->ileb_buf = NULL; -+ ubifs_lpt_free(c, 1); -+ c->remounting_rw = 0; -+ c->always_chk_crc = 0; -+ mutex_unlock(&c->umount_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_remount_ro - re-mount in read-only mode. -+ * @c: UBIFS file-system description object -+ * -+ * We assume VFS has stopped writing. Possibly the background thread could be -+ * running a commit, however kthread_stop will wait in that case. -+ */ -+static void ubifs_remount_ro(struct ubifs_info *c) -+{ -+ int i, err; -+ -+ ubifs_assert(!c->need_recovery); -+ ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); -+ -+ mutex_lock(&c->umount_mutex); -+ if (c->bgt) { -+ kthread_stop(c->bgt); -+ c->bgt = NULL; -+ } -+ -+ dbg_save_space_info(c); -+ -+ for (i = 0; i < c->jhead_cnt; i++) { -+ ubifs_wbuf_sync(&c->jheads[i].wbuf); -+ del_timer_sync(&c->jheads[i].wbuf.timer); -+ } -+ -+ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); -+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); -+ c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); -+ err = ubifs_write_master(c); -+ if (err) -+ ubifs_ro_mode(c, err); -+ -+ free_wbufs(c); -+ vfree(c->orph_buf); -+ c->orph_buf = NULL; -+ vfree(c->ileb_buf); -+ c->ileb_buf = NULL; -+ ubifs_lpt_free(c, 1); -+ err = dbg_check_space_info(c); -+ if (err) -+ ubifs_ro_mode(c, err); -+ mutex_unlock(&c->umount_mutex); -+} -+ -+static void ubifs_put_super(struct super_block *sb) -+{ -+ int i; -+ struct ubifs_info *c = sb->s_fs_info; -+ -+ ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, -+ c->vi.vol_id); -+ /* -+ * The following asserts are only valid if there has not been a failure -+ * of the media. For example, there will be dirty inodes if we failed -+ * to write them back because of I/O errors. -+ */ -+ ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); -+ ubifs_assert(c->budg_idx_growth == 0); -+ ubifs_assert(c->budg_dd_growth == 0); -+ ubifs_assert(c->budg_data_growth == 0); -+ -+ /* -+ * The 'c->umount_lock' prevents races between UBIFS memory shrinker -+ * and file system un-mount. Namely, it prevents the shrinker from -+ * picking this superblock for shrinking - it will be just skipped if -+ * the mutex is locked. -+ */ -+ mutex_lock(&c->umount_mutex); -+ if (!(c->vfs_sb->s_flags & MS_RDONLY)) { -+ /* -+ * First of all kill the background thread to make sure it does -+ * not interfere with un-mounting and freeing resources. -+ */ -+ if (c->bgt) { -+ kthread_stop(c->bgt); -+ c->bgt = NULL; -+ } -+ -+ /* Synchronize write-buffers */ -+ if (c->jheads) -+ for (i = 0; i < c->jhead_cnt; i++) { -+ ubifs_wbuf_sync(&c->jheads[i].wbuf); -+ del_timer_sync(&c->jheads[i].wbuf.timer); -+ } -+ -+ /* -+ * On fatal errors c->ro_media is set to 1, in which case we do -+ * not write the master node. -+ */ -+ if (!c->ro_media) { -+ /* -+ * We are being cleanly unmounted which means the -+ * orphans were killed - indicate this in the master -+ * node. Also save the reserved GC LEB number. -+ */ -+ int err; -+ -+ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); -+ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); -+ c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); -+ err = ubifs_write_master(c); -+ if (err) -+ /* -+ * Recovery will attempt to fix the master area -+ * next mount, so we just print a message and -+ * continue to unmount normally. -+ */ -+ ubifs_err("failed to write master node, " -+ "error %d", err); -+ } -+ } -+ -+ ubifs_umount(c); -+ bdi_destroy(&c->bdi); -+ ubi_close_volume(c->ubi); -+ mutex_unlock(&c->umount_mutex); -+ kfree(c); -+} -+ -+static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) -+{ -+ int err; -+ struct ubifs_info *c = sb->s_fs_info; -+ -+ dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); -+ -+ err = ubifs_parse_options(c, data, 1); -+ if (err) { -+ ubifs_err("invalid or unknown remount parameter"); -+ return err; -+ } -+ -+ if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { -+ if (c->ro_media) { -+ ubifs_msg("cannot re-mount due to prior errors"); -+ return -EROFS; -+ } -+ err = ubifs_remount_rw(c); -+ if (err) -+ return err; -+ } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { -+ if (c->ro_media) { -+ ubifs_msg("cannot re-mount due to prior errors"); -+ return -EROFS; -+ } -+ ubifs_remount_ro(c); -+ } -+ -+ if (c->bulk_read == 1) -+ bu_init(c); -+ else { -+ dbg_gen("disable bulk-read"); -+ kfree(c->bu.buf); -+ c->bu.buf = NULL; -+ } -+ -+ ubifs_assert(c->lst.taken_empty_lebs == 1); -+ return 0; -+} -+ -+const struct super_operations ubifs_super_operations = { -+ .read_inode = ubifs_read_inode, -+ .alloc_inode = ubifs_alloc_inode, -+ .destroy_inode = ubifs_destroy_inode, -+ .put_super = ubifs_put_super, -+ .write_inode = ubifs_write_inode, -+ .delete_inode = ubifs_delete_inode, -+ .statfs = ubifs_statfs, -+ .dirty_inode = ubifs_dirty_inode, -+ .remount_fs = ubifs_remount_fs, -+ .show_options = ubifs_show_options, -+ .sync_fs = ubifs_sync_fs, -+}; -+ -+/** -+ * open_ubi - parse UBI device name string and open the UBI device. -+ * @name: UBI volume name -+ * @mode: UBI volume open mode -+ * -+ * There are several ways to specify UBI volumes when mounting UBIFS: -+ * o ubiX_Y - UBI device number X, volume Y; -+ * o ubiY - UBI device number 0, volume Y; -+ * o ubiX:NAME - mount UBI device X, volume with name NAME; -+ * o ubi:NAME - mount UBI device 0, volume with name NAME. -+ * -+ * Alternative '!' separator may be used instead of ':' (because some shells -+ * like busybox may interpret ':' as an NFS host name separator). This function -+ * returns ubi volume object in case of success and a negative error code in -+ * case of failure. -+ */ -+static struct ubi_volume_desc *open_ubi(const char *name, int mode) -+{ -+ int dev, vol; -+ char *endptr; -+ -+ if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') -+ return ERR_PTR(-EINVAL); -+ -+ /* ubi:NAME method */ -+ if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') -+ return ubi_open_volume_nm(0, name + 4, mode); -+ -+ if (!isdigit(name[3])) -+ return ERR_PTR(-EINVAL); -+ -+ dev = simple_strtoul(name + 3, &endptr, 0); -+ -+ /* ubiY method */ -+ if (*endptr == '\0') -+ return ubi_open_volume(0, dev, mode); -+ -+ /* ubiX_Y method */ -+ if (*endptr == '_' && isdigit(endptr[1])) { -+ vol = simple_strtoul(endptr + 1, &endptr, 0); -+ if (*endptr != '\0') -+ return ERR_PTR(-EINVAL); -+ return ubi_open_volume(dev, vol, mode); -+ } -+ -+ /* ubiX:NAME method */ -+ if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') -+ return ubi_open_volume_nm(dev, ++endptr, mode); -+ -+ return ERR_PTR(-EINVAL); -+} -+ -+static int ubifs_fill_super(struct super_block *sb, void *data, int silent) -+{ -+ struct ubi_volume_desc *ubi = sb->s_fs_info; -+ struct ubifs_info *c; -+ struct inode *root; -+ int err; -+ -+ c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); -+ if (!c) -+ return -ENOMEM; -+ -+ spin_lock_init(&c->cnt_lock); -+ spin_lock_init(&c->cs_lock); -+ spin_lock_init(&c->buds_lock); -+ spin_lock_init(&c->space_lock); -+ spin_lock_init(&c->orphan_lock); -+ init_rwsem(&c->commit_sem); -+ mutex_init(&c->lp_mutex); -+ mutex_init(&c->tnc_mutex); -+ mutex_init(&c->log_mutex); -+ mutex_init(&c->mst_mutex); -+ mutex_init(&c->umount_mutex); -+ mutex_init(&c->bu_mutex); -+ init_waitqueue_head(&c->cmt_wq); -+ c->buds = RB_ROOT; -+ c->old_idx = RB_ROOT; -+ c->size_tree = RB_ROOT; -+ c->orph_tree = RB_ROOT; -+ INIT_LIST_HEAD(&c->infos_list); -+ INIT_LIST_HEAD(&c->idx_gc); -+ INIT_LIST_HEAD(&c->replay_list); -+ INIT_LIST_HEAD(&c->replay_buds); -+ INIT_LIST_HEAD(&c->uncat_list); -+ INIT_LIST_HEAD(&c->empty_list); -+ INIT_LIST_HEAD(&c->freeable_list); -+ INIT_LIST_HEAD(&c->frdi_idx_list); -+ INIT_LIST_HEAD(&c->unclean_leb_list); -+ INIT_LIST_HEAD(&c->old_buds); -+ INIT_LIST_HEAD(&c->orph_list); -+ INIT_LIST_HEAD(&c->orph_new); -+ -+ c->highest_inum = UBIFS_FIRST_INO; -+ c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; -+ -+ ubi_get_volume_info(ubi, &c->vi); -+ ubi_get_device_info(c->vi.ubi_num, &c->di); -+ -+ /* Re-open the UBI device in read-write mode */ -+ c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); -+ if (IS_ERR(c->ubi)) { -+ err = PTR_ERR(c->ubi); -+ goto out_free; -+ } -+ -+ /* -+ * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For -+ * UBIFS, I/O is not deferred, it is done immediately in readpage, -+ * which means the user would have to wait not just for their own I/O -+ * but the read-ahead I/O as well i.e. completely pointless. -+ * -+ * Read-ahead will be disabled because @c->bdi.ra_pages is 0. -+ */ -+ c->bdi.capabilities = BDI_CAP_MAP_COPY; -+ c->bdi.unplug_io_fn = default_unplug_io_fn; -+ err = bdi_init(&c->bdi); -+ if (err) -+ goto out_close; -+ -+ err = ubifs_parse_options(c, data, 0); -+ if (err) -+ goto out_bdi; -+ -+ c->vfs_sb = sb; -+ -+ sb->s_fs_info = c; -+ sb->s_magic = UBIFS_SUPER_MAGIC; -+ sb->s_blocksize = UBIFS_BLOCK_SIZE; -+ sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; -+ sb->s_dev = c->vi.cdev; -+ sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); -+ if (c->max_inode_sz > MAX_LFS_FILESIZE) -+ sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; -+ sb->s_op = &ubifs_super_operations; -+ -+ mutex_lock(&c->umount_mutex); -+ err = mount_ubifs(c); -+ if (err) { -+ ubifs_assert(err < 0); -+ goto out_unlock; -+ } -+ -+ /* Read the root inode */ -+ root = ubifs_iget(sb, UBIFS_ROOT_INO); -+ if (IS_ERR(root)) { -+ err = PTR_ERR(root); -+ goto out_umount; -+ } -+ -+ sb->s_root = d_alloc_root(root); -+ if (!sb->s_root) -+ goto out_iput; -+ -+ mutex_unlock(&c->umount_mutex); -+ return 0; -+ -+out_iput: -+ iput(root); -+out_umount: -+ ubifs_umount(c); -+out_unlock: -+ mutex_unlock(&c->umount_mutex); -+out_bdi: -+ bdi_destroy(&c->bdi); -+out_close: -+ ubi_close_volume(c->ubi); -+out_free: -+ kfree(c); -+ return err; -+} -+ -+static int sb_test(struct super_block *sb, void *data) -+{ -+ dev_t *dev = data; -+ -+ return sb->s_dev == *dev; -+} -+ -+static int sb_set(struct super_block *sb, void *data) -+{ -+ dev_t *dev = data; -+ -+ sb->s_dev = *dev; -+ return 0; -+} -+ -+static int ubifs_get_sb(struct file_system_type *fs_type, int flags, -+ const char *name, void *data, struct vfsmount *mnt) -+{ -+ struct ubi_volume_desc *ubi; -+ struct ubi_volume_info vi; -+ struct super_block *sb; -+ int err; -+ -+ dbg_gen("name %s, flags %#x", name, flags); -+ -+ /* -+ * Get UBI device number and volume ID. Mount it read-only so far -+ * because this might be a new mount point, and UBI allows only one -+ * read-write user at a time. -+ */ -+ ubi = open_ubi(name, UBI_READONLY); -+ if (IS_ERR(ubi)) { -+ ubifs_err("cannot open \"%s\", error %d", -+ name, (int)PTR_ERR(ubi)); -+ return PTR_ERR(ubi); -+ } -+ ubi_get_volume_info(ubi, &vi); -+ -+ dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); -+ -+ sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); -+ if (IS_ERR(sb)) { -+ err = PTR_ERR(sb); -+ goto out_close; -+ } -+ -+ if (sb->s_root) { -+ /* A new mount point for already mounted UBIFS */ -+ dbg_gen("this ubi volume is already mounted"); -+ if ((flags ^ sb->s_flags) & MS_RDONLY) { -+ err = -EBUSY; -+ goto out_deact; -+ } -+ } else { -+ sb->s_flags = flags; -+ /* -+ * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is -+ * replaced by 'c'. -+ */ -+ sb->s_fs_info = ubi; -+ err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); -+ if (err) -+ goto out_deact; -+ /* We do not support atime */ -+ sb->s_flags |= MS_ACTIVE | MS_NOATIME; -+ } -+ -+ /* 'fill_super()' opens ubi again so we must close it here */ -+ ubi_close_volume(ubi); -+ -+ return simple_set_mnt(mnt, sb); -+ -+out_deact: -+ up_write(&sb->s_umount); -+ deactivate_super(sb); -+out_close: -+ ubi_close_volume(ubi); -+ return err; -+} -+ -+static void ubifs_kill_sb(struct super_block *sb) -+{ -+ generic_shutdown_super(sb); -+} -+ -+static struct file_system_type ubifs_fs_type = { -+ .name = "ubifs", -+ .owner = THIS_MODULE, -+ .get_sb = ubifs_get_sb, -+ .kill_sb = ubifs_kill_sb, -+ .fs_flags = FS_REQUIRES_DEV, -+}; -+ -+/* -+ * Inode slab cache constructor. -+ */ -+static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) -+{ -+ struct ubifs_inode *ui = obj; -+ inode_init_once(&ui->vfs_inode); -+} -+ -+static int __init ubifs_init(void) -+{ -+ int err; -+ -+ BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); -+ -+ /* Make sure node sizes are 8-byte aligned */ -+ BUILD_BUG_ON(UBIFS_CH_SZ & 7); -+ BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7); -+ -+ BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7); -+ BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7); -+ BUILD_BUG_ON(MIN_WRITE_SZ & 7); -+ -+ /* Check min. node size */ -+ BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ); -+ BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ); -+ BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ); -+ BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ); -+ -+ BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ); -+ BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ); -+ BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ); -+ BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ); -+ -+ /* Defined node sizes */ -+ BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096); -+ BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512); -+ BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160); -+ BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); -+ -+ /* -+ * We use 2 bit wide bit-fields to store compression type, which should -+ * be amended if more compressors are added. The bit-fields are: -+ * @compr_type in 'struct ubifs_inode', @default_compr in -+ * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'. -+ */ -+ BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4); -+ -+ /* -+ * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to -+ * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. -+ */ -+ if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { -+ ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" -+ " at least 4096 bytes", -+ (unsigned int)PAGE_CACHE_SIZE); -+ return -EINVAL; -+ } -+ -+ err = register_filesystem(&ubifs_fs_type); -+ if (err) { -+ ubifs_err("cannot register file system, error %d", err); -+ return err; -+ } -+ -+ err = -ENOMEM; -+ ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", -+ sizeof(struct ubifs_inode), 0, -+ SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, -+ &inode_slab_ctor); -+ if (!ubifs_inode_slab) -+ goto out_reg; -+ -+ register_shrinker(&ubifs_shrinker_info); -+ -+ err = ubifs_compressors_init(); -+ if (err) -+ goto out_shrinker; -+ -+ err = dbg_debugfs_init(); -+ if (err) -+ goto out_compr; -+ -+ return 0; -+ -+out_compr: -+ ubifs_compressors_exit(); -+out_shrinker: -+ unregister_shrinker(&ubifs_shrinker_info); -+ kmem_cache_destroy(ubifs_inode_slab); -+out_reg: -+ unregister_filesystem(&ubifs_fs_type); -+ return err; -+} -+/* late_initcall to let compressors initialize first */ -+late_initcall(ubifs_init); -+ -+static void __exit ubifs_exit(void) -+{ -+ ubifs_assert(list_empty(&ubifs_infos)); -+ ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); -+ -+ dbg_debugfs_exit(); -+ ubifs_compressors_exit(); -+ unregister_shrinker(&ubifs_shrinker_info); -+ kmem_cache_destroy(ubifs_inode_slab); -+ unregister_filesystem(&ubifs_fs_type); -+} -+module_exit(ubifs_exit); -+ -+MODULE_LICENSE("GPL"); -+MODULE_VERSION(__stringify(UBIFS_VERSION)); -+MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter"); -+MODULE_DESCRIPTION("UBIFS - UBI File System"); -diff -Nurd linux-2.6.24.orig/fs/ubifs/tnc.c linux-2.6.24/fs/ubifs/tnc.c ---- linux-2.6.24.orig/fs/ubifs/tnc.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/tnc.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,3270 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file implements TNC (Tree Node Cache) which caches indexing nodes of -+ * the UBIFS B-tree. -+ * -+ * At the moment the locking rules of the TNC tree are quite simple and -+ * straightforward. We just have a mutex and lock it when we traverse the -+ * tree. If a znode is not in memory, we read it from flash while still having -+ * the mutex locked. -+ */ -+ -+#include <linux/crc32.h> -+#include "ubifs.h" -+ -+/* -+ * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. -+ * @NAME_LESS: name corresponding to the first argument is less than second -+ * @NAME_MATCHES: names match -+ * @NAME_GREATER: name corresponding to the second argument is greater than -+ * first -+ * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media -+ * -+ * These constants were introduce to improve readability. -+ */ -+enum { -+ NAME_LESS = 0, -+ NAME_MATCHES = 1, -+ NAME_GREATER = 2, -+ NOT_ON_MEDIA = 3, -+}; -+ -+/** -+ * insert_old_idx - record an index node obsoleted since the last commit start. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number of obsoleted index node -+ * @offs: offset of obsoleted index node -+ * -+ * Returns %0 on success, and a negative error code on failure. -+ * -+ * For recovery, there must always be a complete intact version of the index on -+ * flash at all times. That is called the "old index". It is the index as at the -+ * time of the last successful commit. Many of the index nodes in the old index -+ * may be dirty, but they must not be erased until the next successful commit -+ * (at which point that index becomes the old index). -+ * -+ * That means that the garbage collection and the in-the-gaps method of -+ * committing must be able to determine if an index node is in the old index. -+ * Most of the old index nodes can be found by looking up the TNC using the -+ * 'lookup_znode()' function. However, some of the old index nodes may have -+ * been deleted from the current index or may have been changed so much that -+ * they cannot be easily found. In those cases, an entry is added to an RB-tree. -+ * That is what this function does. The RB-tree is ordered by LEB number and -+ * offset because they uniquely identify the old index node. -+ */ -+static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) -+{ -+ struct ubifs_old_idx *old_idx, *o; -+ struct rb_node **p, *parent = NULL; -+ -+ old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); -+ if (unlikely(!old_idx)) -+ return -ENOMEM; -+ old_idx->lnum = lnum; -+ old_idx->offs = offs; -+ -+ p = &c->old_idx.rb_node; -+ while (*p) { -+ parent = *p; -+ o = rb_entry(parent, struct ubifs_old_idx, rb); -+ if (lnum < o->lnum) -+ p = &(*p)->rb_left; -+ else if (lnum > o->lnum) -+ p = &(*p)->rb_right; -+ else if (offs < o->offs) -+ p = &(*p)->rb_left; -+ else if (offs > o->offs) -+ p = &(*p)->rb_right; -+ else { -+ ubifs_err("old idx added twice!"); -+ kfree(old_idx); -+ return 0; -+ } -+ } -+ rb_link_node(&old_idx->rb, parent, p); -+ rb_insert_color(&old_idx->rb, &c->old_idx); -+ return 0; -+} -+ -+/** -+ * insert_old_idx_znode - record a znode obsoleted since last commit start. -+ * @c: UBIFS file-system description object -+ * @znode: znode of obsoleted index node -+ * -+ * Returns %0 on success, and a negative error code on failure. -+ */ -+int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode) -+{ -+ if (znode->parent) { -+ struct ubifs_zbranch *zbr; -+ -+ zbr = &znode->parent->zbranch[znode->iip]; -+ if (zbr->len) -+ return insert_old_idx(c, zbr->lnum, zbr->offs); -+ } else -+ if (c->zroot.len) -+ return insert_old_idx(c, c->zroot.lnum, -+ c->zroot.offs); -+ return 0; -+} -+ -+/** -+ * ins_clr_old_idx_znode - record a znode obsoleted since last commit start. -+ * @c: UBIFS file-system description object -+ * @znode: znode of obsoleted index node -+ * -+ * Returns %0 on success, and a negative error code on failure. -+ */ -+static int ins_clr_old_idx_znode(struct ubifs_info *c, -+ struct ubifs_znode *znode) -+{ -+ int err; -+ -+ if (znode->parent) { -+ struct ubifs_zbranch *zbr; -+ -+ zbr = &znode->parent->zbranch[znode->iip]; -+ if (zbr->len) { -+ err = insert_old_idx(c, zbr->lnum, zbr->offs); -+ if (err) -+ return err; -+ zbr->lnum = 0; -+ zbr->offs = 0; -+ zbr->len = 0; -+ } -+ } else -+ if (c->zroot.len) { -+ err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs); -+ if (err) -+ return err; -+ c->zroot.lnum = 0; -+ c->zroot.offs = 0; -+ c->zroot.len = 0; -+ } -+ return 0; -+} -+ -+/** -+ * destroy_old_idx - destroy the old_idx RB-tree. -+ * @c: UBIFS file-system description object -+ * -+ * During start commit, the old_idx RB-tree is used to avoid overwriting index -+ * nodes that were in the index last commit but have since been deleted. This -+ * is necessary for recovery i.e. the old index must be kept intact until the -+ * new index is successfully written. The old-idx RB-tree is used for the -+ * in-the-gaps method of writing index nodes and is destroyed every commit. -+ */ -+void destroy_old_idx(struct ubifs_info *c) -+{ -+ struct rb_node *this = c->old_idx.rb_node; -+ struct ubifs_old_idx *old_idx; -+ -+ while (this) { -+ if (this->rb_left) { -+ this = this->rb_left; -+ continue; -+ } else if (this->rb_right) { -+ this = this->rb_right; -+ continue; -+ } -+ old_idx = rb_entry(this, struct ubifs_old_idx, rb); -+ this = rb_parent(this); -+ if (this) { -+ if (this->rb_left == &old_idx->rb) -+ this->rb_left = NULL; -+ else -+ this->rb_right = NULL; -+ } -+ kfree(old_idx); -+ } -+ c->old_idx = RB_ROOT; -+} -+ -+/** -+ * copy_znode - copy a dirty znode. -+ * @c: UBIFS file-system description object -+ * @znode: znode to copy -+ * -+ * A dirty znode being committed may not be changed, so it is copied. -+ */ -+static struct ubifs_znode *copy_znode(struct ubifs_info *c, -+ struct ubifs_znode *znode) -+{ -+ struct ubifs_znode *zn; -+ -+ zn = kmalloc(c->max_znode_sz, GFP_NOFS); -+ if (unlikely(!zn)) -+ return ERR_PTR(-ENOMEM); -+ -+ memcpy(zn, znode, c->max_znode_sz); -+ zn->cnext = NULL; -+ __set_bit(DIRTY_ZNODE, &zn->flags); -+ __clear_bit(COW_ZNODE, &zn->flags); -+ -+ ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); -+ __set_bit(OBSOLETE_ZNODE, &znode->flags); -+ -+ if (znode->level != 0) { -+ int i; -+ const int n = zn->child_cnt; -+ -+ /* The children now have new parent */ -+ for (i = 0; i < n; i++) { -+ struct ubifs_zbranch *zbr = &zn->zbranch[i]; -+ -+ if (zbr->znode) -+ zbr->znode->parent = zn; -+ } -+ } -+ -+ atomic_long_inc(&c->dirty_zn_cnt); -+ return zn; -+} -+ -+/** -+ * add_idx_dirt - add dirt due to a dirty znode. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number of index node -+ * @dirt: size of index node -+ * -+ * This function updates lprops dirty space and the new size of the index. -+ */ -+static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) -+{ -+ c->calc_idx_sz -= ALIGN(dirt, 8); -+ return ubifs_add_dirt(c, lnum, dirt); -+} -+ -+/** -+ * dirty_cow_znode - ensure a znode is not being committed. -+ * @c: UBIFS file-system description object -+ * @zbr: branch of znode to check -+ * -+ * Returns dirtied znode on success or negative error code on failure. -+ */ -+static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, -+ struct ubifs_zbranch *zbr) -+{ -+ struct ubifs_znode *znode = zbr->znode; -+ struct ubifs_znode *zn; -+ int err; -+ -+ if (!test_bit(COW_ZNODE, &znode->flags)) { -+ /* znode is not being committed */ -+ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { -+ atomic_long_inc(&c->dirty_zn_cnt); -+ atomic_long_dec(&c->clean_zn_cnt); -+ atomic_long_dec(&ubifs_clean_zn_cnt); -+ err = add_idx_dirt(c, zbr->lnum, zbr->len); -+ if (unlikely(err)) -+ return ERR_PTR(err); -+ } -+ return znode; -+ } -+ -+ zn = copy_znode(c, znode); -+ if (IS_ERR(zn)) -+ return zn; -+ -+ if (zbr->len) { -+ err = insert_old_idx(c, zbr->lnum, zbr->offs); -+ if (unlikely(err)) -+ return ERR_PTR(err); -+ err = add_idx_dirt(c, zbr->lnum, zbr->len); -+ } else -+ err = 0; -+ -+ zbr->znode = zn; -+ zbr->lnum = 0; -+ zbr->offs = 0; -+ zbr->len = 0; -+ -+ if (unlikely(err)) -+ return ERR_PTR(err); -+ return zn; -+} -+ -+/** -+ * lnc_add - add a leaf node to the leaf node cache. -+ * @c: UBIFS file-system description object -+ * @zbr: zbranch of leaf node -+ * @node: leaf node -+ * -+ * Leaf nodes are non-index nodes directory entry nodes or data nodes. The -+ * purpose of the leaf node cache is to save re-reading the same leaf node over -+ * and over again. Most things are cached by VFS, however the file system must -+ * cache directory entries for readdir and for resolving hash collisions. The -+ * present implementation of the leaf node cache is extremely simple, and -+ * allows for error returns that are not used but that may be needed if a more -+ * complex implementation is created. -+ * -+ * Note, this function does not add the @node object to LNC directly, but -+ * allocates a copy of the object and adds the copy to LNC. The reason for this -+ * is that @node has been allocated outside of the TNC subsystem and will be -+ * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC -+ * may be changed at any time, e.g. freed by the shrinker. -+ */ -+static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ const void *node) -+{ -+ int err; -+ void *lnc_node; -+ const struct ubifs_dent_node *dent = node; -+ -+ ubifs_assert(!zbr->leaf); -+ ubifs_assert(zbr->len != 0); -+ ubifs_assert(is_hash_key(c, &zbr->key)); -+ -+ err = ubifs_validate_entry(c, dent); -+ if (err) { -+ dbg_dump_stack(); -+ dbg_dump_node(c, dent); -+ return err; -+ } -+ -+ lnc_node = kmalloc(zbr->len, GFP_NOFS); -+ if (!lnc_node) -+ /* We don't have to have the cache, so no error */ -+ return 0; -+ -+ memcpy(lnc_node, node, zbr->len); -+ zbr->leaf = lnc_node; -+ return 0; -+} -+ -+ /** -+ * lnc_add_directly - add a leaf node to the leaf-node-cache. -+ * @c: UBIFS file-system description object -+ * @zbr: zbranch of leaf node -+ * @node: leaf node -+ * -+ * This function is similar to 'lnc_add()', but it does not create a copy of -+ * @node but inserts @node to TNC directly. -+ */ -+static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ void *node) -+{ -+ int err; -+ -+ ubifs_assert(!zbr->leaf); -+ ubifs_assert(zbr->len != 0); -+ -+ err = ubifs_validate_entry(c, node); -+ if (err) { -+ dbg_dump_stack(); -+ dbg_dump_node(c, node); -+ return err; -+ } -+ -+ zbr->leaf = node; -+ return 0; -+} -+ -+/** -+ * lnc_free - remove a leaf node from the leaf node cache. -+ * @zbr: zbranch of leaf node -+ * @node: leaf node -+ */ -+static void lnc_free(struct ubifs_zbranch *zbr) -+{ -+ if (!zbr->leaf) -+ return; -+ kfree(zbr->leaf); -+ zbr->leaf = NULL; -+} -+ -+/** -+ * tnc_read_node_nm - read a "hashed" leaf node. -+ * @c: UBIFS file-system description object -+ * @zbr: key and position of the node -+ * @node: node is returned here -+ * -+ * This function reads a "hashed" node defined by @zbr from the leaf node cache -+ * (in it is there) or from the hash media, in which case the node is also -+ * added to LNC. Returns zero in case of success or a negative negative error -+ * code in case of failure. -+ */ -+static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ void *node) -+{ -+ int err; -+ -+ ubifs_assert(is_hash_key(c, &zbr->key)); -+ -+ if (zbr->leaf) { -+ /* Read from the leaf node cache */ -+ ubifs_assert(zbr->len != 0); -+ memcpy(node, zbr->leaf, zbr->len); -+ return 0; -+ } -+ -+ err = ubifs_tnc_read_node(c, zbr, node); -+ if (err) -+ return err; -+ -+ /* Add the node to the leaf node cache */ -+ err = lnc_add(c, zbr, node); -+ return err; -+} -+ -+/** -+ * try_read_node - read a node if it is a node. -+ * @c: UBIFS file-system description object -+ * @buf: buffer to read to -+ * @type: node type -+ * @len: node length (not aligned) -+ * @lnum: LEB number of node to read -+ * @offs: offset of node to read -+ * -+ * This function tries to read a node of known type and length, checks it and -+ * stores it in @buf. This function returns %1 if a node is present and %0 if -+ * a node is not present. A negative error code is returned for I/O errors. -+ * This function performs that same function as ubifs_read_node except that -+ * it does not require that there is actually a node present and instead -+ * the return code indicates if a node was read. -+ * -+ * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc -+ * is true (it is controlled by corresponding mount option). However, if -+ * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always -+ * checked. -+ */ -+static int try_read_node(const struct ubifs_info *c, void *buf, int type, -+ int len, int lnum, int offs) -+{ -+ int err, node_len; -+ struct ubifs_ch *ch = buf; -+ uint32_t crc, node_crc; -+ -+ dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); -+ -+ err = ubi_read(c->ubi, lnum, buf, offs, len); -+ if (err) { -+ ubifs_err("cannot read node type %d from LEB %d:%d, error %d", -+ type, lnum, offs, err); -+ return err; -+ } -+ -+ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) -+ return 0; -+ -+ if (ch->node_type != type) -+ return 0; -+ -+ node_len = le32_to_cpu(ch->len); -+ if (node_len != len) -+ return 0; -+ -+ if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) -+ return 1; -+ -+ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); -+ node_crc = le32_to_cpu(ch->crc); -+ if (crc != node_crc) -+ return 0; -+ -+ return 1; -+} -+ -+/** -+ * fallible_read_node - try to read a leaf node. -+ * @c: UBIFS file-system description object -+ * @key: key of node to read -+ * @zbr: position of node -+ * @node: node returned -+ * -+ * This function tries to read a node and returns %1 if the node is read, %0 -+ * if the node is not present, and a negative error code in the case of error. -+ */ -+static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, -+ struct ubifs_zbranch *zbr, void *node) -+{ -+ int ret; -+ -+ dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); -+ -+ ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, -+ zbr->offs); -+ if (ret == 1) { -+ union ubifs_key node_key; -+ struct ubifs_dent_node *dent = node; -+ -+ /* All nodes have key in the same place */ -+ key_read(c, &dent->key, &node_key); -+ if (keys_cmp(c, key, &node_key) != 0) -+ ret = 0; -+ } -+ if (ret == 0 && c->replaying) -+ dbg_mnt("dangling branch LEB %d:%d len %d, key %s", -+ zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); -+ return ret; -+} -+ -+/** -+ * matches_name - determine if a direntry or xattr entry matches a given name. -+ * @c: UBIFS file-system description object -+ * @zbr: zbranch of dent -+ * @nm: name to match -+ * -+ * This function checks if xentry/direntry referred by zbranch @zbr matches name -+ * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by -+ * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case -+ * of failure, a negative error code is returned. -+ */ -+static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ const struct qstr *nm) -+{ -+ struct ubifs_dent_node *dent; -+ int nlen, err; -+ -+ /* If possible, match against the dent in the leaf node cache */ -+ if (!zbr->leaf) { -+ dent = kmalloc(zbr->len, GFP_NOFS); -+ if (!dent) -+ return -ENOMEM; -+ -+ err = ubifs_tnc_read_node(c, zbr, dent); -+ if (err) -+ goto out_free; -+ -+ /* Add the node to the leaf node cache */ -+ err = lnc_add_directly(c, zbr, dent); -+ if (err) -+ goto out_free; -+ } else -+ dent = zbr->leaf; -+ -+ nlen = le16_to_cpu(dent->nlen); -+ err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); -+ if (err == 0) { -+ if (nlen == nm->len) -+ return NAME_MATCHES; -+ else if (nlen < nm->len) -+ return NAME_LESS; -+ else -+ return NAME_GREATER; -+ } else if (err < 0) -+ return NAME_LESS; -+ else -+ return NAME_GREATER; -+ -+out_free: -+ kfree(dent); -+ return err; -+} -+ -+/** -+ * get_znode - get a TNC znode that may not be loaded yet. -+ * @c: UBIFS file-system description object -+ * @znode: parent znode -+ * @n: znode branch slot number -+ * -+ * This function returns the znode or a negative error code. -+ */ -+static struct ubifs_znode *get_znode(struct ubifs_info *c, -+ struct ubifs_znode *znode, int n) -+{ -+ struct ubifs_zbranch *zbr; -+ -+ zbr = &znode->zbranch[n]; -+ if (zbr->znode) -+ znode = zbr->znode; -+ else -+ znode = ubifs_load_znode(c, zbr, znode, n); -+ return znode; -+} -+ -+/** -+ * tnc_next - find next TNC entry. -+ * @c: UBIFS file-system description object -+ * @zn: znode is passed and returned here -+ * @n: znode branch slot number is passed and returned here -+ * -+ * This function returns %0 if the next TNC entry is found, %-ENOENT if there is -+ * no next entry, or a negative error code otherwise. -+ */ -+static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n) -+{ -+ struct ubifs_znode *znode = *zn; -+ int nn = *n; -+ -+ nn += 1; -+ if (nn < znode->child_cnt) { -+ *n = nn; -+ return 0; -+ } -+ while (1) { -+ struct ubifs_znode *zp; -+ -+ zp = znode->parent; -+ if (!zp) -+ return -ENOENT; -+ nn = znode->iip + 1; -+ znode = zp; -+ if (nn < znode->child_cnt) { -+ znode = get_znode(c, znode, nn); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ while (znode->level != 0) { -+ znode = get_znode(c, znode, 0); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ } -+ nn = 0; -+ break; -+ } -+ } -+ *zn = znode; -+ *n = nn; -+ return 0; -+} -+ -+/** -+ * tnc_prev - find previous TNC entry. -+ * @c: UBIFS file-system description object -+ * @zn: znode is returned here -+ * @n: znode branch slot number is passed and returned here -+ * -+ * This function returns %0 if the previous TNC entry is found, %-ENOENT if -+ * there is no next entry, or a negative error code otherwise. -+ */ -+static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n) -+{ -+ struct ubifs_znode *znode = *zn; -+ int nn = *n; -+ -+ if (nn > 0) { -+ *n = nn - 1; -+ return 0; -+ } -+ while (1) { -+ struct ubifs_znode *zp; -+ -+ zp = znode->parent; -+ if (!zp) -+ return -ENOENT; -+ nn = znode->iip - 1; -+ znode = zp; -+ if (nn >= 0) { -+ znode = get_znode(c, znode, nn); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ while (znode->level != 0) { -+ nn = znode->child_cnt - 1; -+ znode = get_znode(c, znode, nn); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ } -+ nn = znode->child_cnt - 1; -+ break; -+ } -+ } -+ *zn = znode; -+ *n = nn; -+ return 0; -+} -+ -+/** -+ * resolve_collision - resolve a collision. -+ * @c: UBIFS file-system description object -+ * @key: key of a directory or extended attribute entry -+ * @zn: znode is returned here -+ * @n: zbranch number is passed and returned here -+ * @nm: name of the entry -+ * -+ * This function is called for "hashed" keys to make sure that the found key -+ * really corresponds to the looked up node (directory or extended attribute -+ * entry). It returns %1 and sets @zn and @n if the collision is resolved. -+ * %0 is returned if @nm is not found and @zn and @n are set to the previous -+ * entry, i.e. to the entry after which @nm could follow if it were in TNC. -+ * This means that @n may be set to %-1 if the leftmost key in @zn is the -+ * previous one. A negative error code is returned on failures. -+ */ -+static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key, -+ struct ubifs_znode **zn, int *n, -+ const struct qstr *nm) -+{ -+ int err; -+ -+ err = matches_name(c, &(*zn)->zbranch[*n], nm); -+ if (unlikely(err < 0)) -+ return err; -+ if (err == NAME_MATCHES) -+ return 1; -+ -+ if (err == NAME_GREATER) { -+ /* Look left */ -+ while (1) { -+ err = tnc_prev(c, zn, n); -+ if (err == -ENOENT) { -+ ubifs_assert(*n == 0); -+ *n = -1; -+ return 0; -+ } -+ if (err < 0) -+ return err; -+ if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { -+ /* -+ * We have found the branch after which we would -+ * like to insert, but inserting in this znode -+ * may still be wrong. Consider the following 3 -+ * znodes, in the case where we are resolving a -+ * collision with Key2. -+ * -+ * znode zp -+ * ---------------------- -+ * level 1 | Key0 | Key1 | -+ * ----------------------- -+ * | | -+ * znode za | | znode zb -+ * ------------ ------------ -+ * level 0 | Key0 | | Key2 | -+ * ------------ ------------ -+ * -+ * The lookup finds Key2 in znode zb. Lets say -+ * there is no match and the name is greater so -+ * we look left. When we find Key0, we end up -+ * here. If we return now, we will insert into -+ * znode za at slot n = 1. But that is invalid -+ * according to the parent's keys. Key2 must -+ * be inserted into znode zb. -+ * -+ * Note, this problem is not relevant for the -+ * case when we go right, because -+ * 'tnc_insert()' would correct the parent key. -+ */ -+ if (*n == (*zn)->child_cnt - 1) { -+ err = tnc_next(c, zn, n); -+ if (err) { -+ /* Should be impossible */ -+ ubifs_assert(0); -+ if (err == -ENOENT) -+ err = -EINVAL; -+ return err; -+ } -+ ubifs_assert(*n == 0); -+ *n = -1; -+ } -+ return 0; -+ } -+ err = matches_name(c, &(*zn)->zbranch[*n], nm); -+ if (err < 0) -+ return err; -+ if (err == NAME_LESS) -+ return 0; -+ if (err == NAME_MATCHES) -+ return 1; -+ ubifs_assert(err == NAME_GREATER); -+ } -+ } else { -+ int nn = *n; -+ struct ubifs_znode *znode = *zn; -+ -+ /* Look right */ -+ while (1) { -+ err = tnc_next(c, &znode, &nn); -+ if (err == -ENOENT) -+ return 0; -+ if (err < 0) -+ return err; -+ if (keys_cmp(c, &znode->zbranch[nn].key, key)) -+ return 0; -+ err = matches_name(c, &znode->zbranch[nn], nm); -+ if (err < 0) -+ return err; -+ if (err == NAME_GREATER) -+ return 0; -+ *zn = znode; -+ *n = nn; -+ if (err == NAME_MATCHES) -+ return 1; -+ ubifs_assert(err == NAME_LESS); -+ } -+ } -+} -+ -+/** -+ * fallible_matches_name - determine if a dent matches a given name. -+ * @c: UBIFS file-system description object -+ * @zbr: zbranch of dent -+ * @nm: name to match -+ * -+ * This is a "fallible" version of 'matches_name()' function which does not -+ * panic if the direntry/xentry referred by @zbr does not exist on the media. -+ * -+ * This function checks if xentry/direntry referred by zbranch @zbr matches name -+ * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr -+ * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA -+ * if xentry/direntry referred by @zbr does not exist on the media. A negative -+ * error code is returned in case of failure. -+ */ -+static int fallible_matches_name(struct ubifs_info *c, -+ struct ubifs_zbranch *zbr, -+ const struct qstr *nm) -+{ -+ struct ubifs_dent_node *dent; -+ int nlen, err; -+ -+ /* If possible, match against the dent in the leaf node cache */ -+ if (!zbr->leaf) { -+ dent = kmalloc(zbr->len, GFP_NOFS); -+ if (!dent) -+ return -ENOMEM; -+ -+ err = fallible_read_node(c, &zbr->key, zbr, dent); -+ if (err < 0) -+ goto out_free; -+ if (err == 0) { -+ /* The node was not present */ -+ err = NOT_ON_MEDIA; -+ goto out_free; -+ } -+ ubifs_assert(err == 1); -+ -+ err = lnc_add_directly(c, zbr, dent); -+ if (err) -+ goto out_free; -+ } else -+ dent = zbr->leaf; -+ -+ nlen = le16_to_cpu(dent->nlen); -+ err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); -+ if (err == 0) { -+ if (nlen == nm->len) -+ return NAME_MATCHES; -+ else if (nlen < nm->len) -+ return NAME_LESS; -+ else -+ return NAME_GREATER; -+ } else if (err < 0) -+ return NAME_LESS; -+ else -+ return NAME_GREATER; -+ -+out_free: -+ kfree(dent); -+ return err; -+} -+ -+/** -+ * fallible_resolve_collision - resolve a collision even if nodes are missing. -+ * @c: UBIFS file-system description object -+ * @key: key -+ * @zn: znode is returned here -+ * @n: branch number is passed and returned here -+ * @nm: name of directory entry -+ * @adding: indicates caller is adding a key to the TNC -+ * -+ * This is a "fallible" version of the 'resolve_collision()' function which -+ * does not panic if one of the nodes referred to by TNC does not exist on the -+ * media. This may happen when replaying the journal if a deleted node was -+ * Garbage-collected and the commit was not done. A branch that refers to a node -+ * that is not present is called a dangling branch. The following are the return -+ * codes for this function: -+ * o if @nm was found, %1 is returned and @zn and @n are set to the found -+ * branch; -+ * o if we are @adding and @nm was not found, %0 is returned; -+ * o if we are not @adding and @nm was not found, but a dangling branch was -+ * found, then %1 is returned and @zn and @n are set to the dangling branch; -+ * o a negative error code is returned in case of failure. -+ */ -+static int fallible_resolve_collision(struct ubifs_info *c, -+ const union ubifs_key *key, -+ struct ubifs_znode **zn, int *n, -+ const struct qstr *nm, int adding) -+{ -+ struct ubifs_znode *o_znode = NULL, *znode = *zn; -+ int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; -+ -+ cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); -+ if (unlikely(cmp < 0)) -+ return cmp; -+ if (cmp == NAME_MATCHES) -+ return 1; -+ if (cmp == NOT_ON_MEDIA) { -+ o_znode = znode; -+ o_n = nn; -+ /* -+ * We are unlucky and hit a dangling branch straight away. -+ * Now we do not really know where to go to find the needed -+ * branch - to the left or to the right. Well, let's try left. -+ */ -+ unsure = 1; -+ } else if (!adding) -+ unsure = 1; /* Remove a dangling branch wherever it is */ -+ -+ if (cmp == NAME_GREATER || unsure) { -+ /* Look left */ -+ while (1) { -+ err = tnc_prev(c, zn, n); -+ if (err == -ENOENT) { -+ ubifs_assert(*n == 0); -+ *n = -1; -+ break; -+ } -+ if (err < 0) -+ return err; -+ if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { -+ /* See comments in 'resolve_collision()' */ -+ if (*n == (*zn)->child_cnt - 1) { -+ err = tnc_next(c, zn, n); -+ if (err) { -+ /* Should be impossible */ -+ ubifs_assert(0); -+ if (err == -ENOENT) -+ err = -EINVAL; -+ return err; -+ } -+ ubifs_assert(*n == 0); -+ *n = -1; -+ } -+ break; -+ } -+ err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm); -+ if (err < 0) -+ return err; -+ if (err == NAME_MATCHES) -+ return 1; -+ if (err == NOT_ON_MEDIA) { -+ o_znode = *zn; -+ o_n = *n; -+ continue; -+ } -+ if (!adding) -+ continue; -+ if (err == NAME_LESS) -+ break; -+ else -+ unsure = 0; -+ } -+ } -+ -+ if (cmp == NAME_LESS || unsure) { -+ /* Look right */ -+ *zn = znode; -+ *n = nn; -+ while (1) { -+ err = tnc_next(c, &znode, &nn); -+ if (err == -ENOENT) -+ break; -+ if (err < 0) -+ return err; -+ if (keys_cmp(c, &znode->zbranch[nn].key, key)) -+ break; -+ err = fallible_matches_name(c, &znode->zbranch[nn], nm); -+ if (err < 0) -+ return err; -+ if (err == NAME_GREATER) -+ break; -+ *zn = znode; -+ *n = nn; -+ if (err == NAME_MATCHES) -+ return 1; -+ if (err == NOT_ON_MEDIA) { -+ o_znode = znode; -+ o_n = nn; -+ } -+ } -+ } -+ -+ /* Never match a dangling branch when adding */ -+ if (adding || !o_znode) -+ return 0; -+ -+ dbg_mnt("dangling match LEB %d:%d len %d %s", -+ o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, -+ o_znode->zbranch[o_n].len, DBGKEY(key)); -+ *zn = o_znode; -+ *n = o_n; -+ return 1; -+} -+ -+/** -+ * matches_position - determine if a zbranch matches a given position. -+ * @zbr: zbranch of dent -+ * @lnum: LEB number of dent to match -+ * @offs: offset of dent to match -+ * -+ * This function returns %1 if @lnum:@offs matches, and %0 otherwise. -+ */ -+static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs) -+{ -+ if (zbr->lnum == lnum && zbr->offs == offs) -+ return 1; -+ else -+ return 0; -+} -+ -+/** -+ * resolve_collision_directly - resolve a collision directly. -+ * @c: UBIFS file-system description object -+ * @key: key of directory entry -+ * @zn: znode is passed and returned here -+ * @n: zbranch number is passed and returned here -+ * @lnum: LEB number of dent node to match -+ * @offs: offset of dent node to match -+ * -+ * This function is used for "hashed" keys to make sure the found directory or -+ * extended attribute entry node is what was looked for. It is used when the -+ * flash address of the right node is known (@lnum:@offs) which makes it much -+ * easier to resolve collisions (no need to read entries and match full -+ * names). This function returns %1 and sets @zn and @n if the collision is -+ * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the -+ * previous directory entry. Otherwise a negative error code is returned. -+ */ -+static int resolve_collision_directly(struct ubifs_info *c, -+ const union ubifs_key *key, -+ struct ubifs_znode **zn, int *n, -+ int lnum, int offs) -+{ -+ struct ubifs_znode *znode; -+ int nn, err; -+ -+ znode = *zn; -+ nn = *n; -+ if (matches_position(&znode->zbranch[nn], lnum, offs)) -+ return 1; -+ -+ /* Look left */ -+ while (1) { -+ err = tnc_prev(c, &znode, &nn); -+ if (err == -ENOENT) -+ break; -+ if (err < 0) -+ return err; -+ if (keys_cmp(c, &znode->zbranch[nn].key, key)) -+ break; -+ if (matches_position(&znode->zbranch[nn], lnum, offs)) { -+ *zn = znode; -+ *n = nn; -+ return 1; -+ } -+ } -+ -+ /* Look right */ -+ znode = *zn; -+ nn = *n; -+ while (1) { -+ err = tnc_next(c, &znode, &nn); -+ if (err == -ENOENT) -+ return 0; -+ if (err < 0) -+ return err; -+ if (keys_cmp(c, &znode->zbranch[nn].key, key)) -+ return 0; -+ *zn = znode; -+ *n = nn; -+ if (matches_position(&znode->zbranch[nn], lnum, offs)) -+ return 1; -+ } -+} -+ -+/** -+ * dirty_cow_bottom_up - dirty a znode and its ancestors. -+ * @c: UBIFS file-system description object -+ * @znode: znode to dirty -+ * -+ * If we do not have a unique key that resides in a znode, then we cannot -+ * dirty that znode from the top down (i.e. by using lookup_level0_dirty) -+ * This function records the path back to the last dirty ancestor, and then -+ * dirties the znodes on that path. -+ */ -+static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, -+ struct ubifs_znode *znode) -+{ -+ struct ubifs_znode *zp; -+ int *path = c->bottom_up_buf, p = 0; -+ -+ ubifs_assert(c->zroot.znode); -+ ubifs_assert(znode); -+ if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) { -+ kfree(c->bottom_up_buf); -+ c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int), -+ GFP_NOFS); -+ if (!c->bottom_up_buf) -+ return ERR_PTR(-ENOMEM); -+ path = c->bottom_up_buf; -+ } -+ if (c->zroot.znode->level) { -+ /* Go up until parent is dirty */ -+ while (1) { -+ int n; -+ -+ zp = znode->parent; -+ if (!zp) -+ break; -+ n = znode->iip; -+ ubifs_assert(p < c->zroot.znode->level); -+ path[p++] = n; -+ if (!zp->cnext && ubifs_zn_dirty(znode)) -+ break; -+ znode = zp; -+ } -+ } -+ -+ /* Come back down, dirtying as we go */ -+ while (1) { -+ struct ubifs_zbranch *zbr; -+ -+ zp = znode->parent; -+ if (zp) { -+ ubifs_assert(path[p - 1] >= 0); -+ ubifs_assert(path[p - 1] < zp->child_cnt); -+ zbr = &zp->zbranch[path[--p]]; -+ znode = dirty_cow_znode(c, zbr); -+ } else { -+ ubifs_assert(znode == c->zroot.znode); -+ znode = dirty_cow_znode(c, &c->zroot); -+ } -+ if (IS_ERR(znode) || !p) -+ break; -+ ubifs_assert(path[p - 1] >= 0); -+ ubifs_assert(path[p - 1] < znode->child_cnt); -+ znode = znode->zbranch[path[p - 1]].znode; -+ } -+ -+ return znode; -+} -+ -+/** -+ * ubifs_lookup_level0 - search for zero-level znode. -+ * @c: UBIFS file-system description object -+ * @key: key to lookup -+ * @zn: znode is returned here -+ * @n: znode branch slot number is returned here -+ * -+ * This function looks up the TNC tree and search for zero-level znode which -+ * refers key @key. The found zero-level znode is returned in @zn. There are 3 -+ * cases: -+ * o exact match, i.e. the found zero-level znode contains key @key, then %1 -+ * is returned and slot number of the matched branch is stored in @n; -+ * o not exact match, which means that zero-level znode does not contain -+ * @key, then %0 is returned and slot number of the closed branch is stored -+ * in @n; -+ * o @key is so small that it is even less than the lowest key of the -+ * leftmost zero-level node, then %0 is returned and %0 is stored in @n. -+ * -+ * Note, when the TNC tree is traversed, some znodes may be absent, then this -+ * function reads corresponding indexing nodes and inserts them to TNC. In -+ * case of failure, a negative error code is returned. -+ */ -+int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, -+ struct ubifs_znode **zn, int *n) -+{ -+ int err, exact; -+ struct ubifs_znode *znode; -+ unsigned long time = get_seconds(); -+ -+ dbg_tnc("search key %s", DBGKEY(key)); -+ -+ znode = c->zroot.znode; -+ if (unlikely(!znode)) { -+ znode = ubifs_load_znode(c, &c->zroot, NULL, 0); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ } -+ -+ znode->time = time; -+ -+ while (1) { -+ struct ubifs_zbranch *zbr; -+ -+ exact = ubifs_search_zbranch(c, znode, key, n); -+ -+ if (znode->level == 0) -+ break; -+ -+ if (*n < 0) -+ *n = 0; -+ zbr = &znode->zbranch[*n]; -+ -+ if (zbr->znode) { -+ znode->time = time; -+ znode = zbr->znode; -+ continue; -+ } -+ -+ /* znode is not in TNC cache, load it from the media */ -+ znode = ubifs_load_znode(c, zbr, znode, *n); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ } -+ -+ *zn = znode; -+ if (exact || !is_hash_key(c, key) || *n != -1) { -+ dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); -+ return exact; -+ } -+ -+ /* -+ * Here is a tricky place. We have not found the key and this is a -+ * "hashed" key, which may collide. The rest of the code deals with -+ * situations like this: -+ * -+ * | 3 | 5 | -+ * / \ -+ * | 3 | 5 | | 6 | 7 | (x) -+ * -+ * Or more a complex example: -+ * -+ * | 1 | 5 | -+ * / \ -+ * | 1 | 3 | | 5 | 8 | -+ * \ / -+ * | 5 | 5 | | 6 | 7 | (x) -+ * -+ * In the examples, if we are looking for key "5", we may reach nodes -+ * marked with "(x)". In this case what we have do is to look at the -+ * left and see if there is "5" key there. If there is, we have to -+ * return it. -+ * -+ * Note, this whole situation is possible because we allow to have -+ * elements which are equivalent to the next key in the parent in the -+ * children of current znode. For example, this happens if we split a -+ * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something -+ * like this: -+ * | 3 | 5 | -+ * / \ -+ * | 3 | 5 | | 5 | 6 | 7 | -+ * ^ -+ * And this becomes what is at the first "picture" after key "5" marked -+ * with "^" is removed. What could be done is we could prohibit -+ * splitting in the middle of the colliding sequence. Also, when -+ * removing the leftmost key, we would have to correct the key of the -+ * parent node, which would introduce additional complications. Namely, -+ * if we changed the the leftmost key of the parent znode, the garbage -+ * collector would be unable to find it (GC is doing this when GC'ing -+ * indexing LEBs). Although we already have an additional RB-tree where -+ * we save such changed znodes (see 'ins_clr_old_idx_znode()') until -+ * after the commit. But anyway, this does not look easy to implement -+ * so we did not try this. -+ */ -+ err = tnc_prev(c, &znode, n); -+ if (err == -ENOENT) { -+ dbg_tnc("found 0, lvl %d, n -1", znode->level); -+ *n = -1; -+ return 0; -+ } -+ if (unlikely(err < 0)) -+ return err; -+ if (keys_cmp(c, key, &znode->zbranch[*n].key)) { -+ dbg_tnc("found 0, lvl %d, n -1", znode->level); -+ *n = -1; -+ return 0; -+ } -+ -+ dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); -+ *zn = znode; -+ return 1; -+} -+ -+/** -+ * lookup_level0_dirty - search for zero-level znode dirtying. -+ * @c: UBIFS file-system description object -+ * @key: key to lookup -+ * @zn: znode is returned here -+ * @n: znode branch slot number is returned here -+ * -+ * This function looks up the TNC tree and search for zero-level znode which -+ * refers key @key. The found zero-level znode is returned in @zn. There are 3 -+ * cases: -+ * o exact match, i.e. the found zero-level znode contains key @key, then %1 -+ * is returned and slot number of the matched branch is stored in @n; -+ * o not exact match, which means that zero-level znode does not contain @key -+ * then %0 is returned and slot number of the closed branch is stored in -+ * @n; -+ * o @key is so small that it is even less than the lowest key of the -+ * leftmost zero-level node, then %0 is returned and %-1 is stored in @n. -+ * -+ * Additionally all znodes in the path from the root to the located zero-level -+ * znode are marked as dirty. -+ * -+ * Note, when the TNC tree is traversed, some znodes may be absent, then this -+ * function reads corresponding indexing nodes and inserts them to TNC. In -+ * case of failure, a negative error code is returned. -+ */ -+static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, -+ struct ubifs_znode **zn, int *n) -+{ -+ int err, exact; -+ struct ubifs_znode *znode; -+ unsigned long time = get_seconds(); -+ -+ dbg_tnc("search and dirty key %s", DBGKEY(key)); -+ -+ znode = c->zroot.znode; -+ if (unlikely(!znode)) { -+ znode = ubifs_load_znode(c, &c->zroot, NULL, 0); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ } -+ -+ znode = dirty_cow_znode(c, &c->zroot); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ -+ znode->time = time; -+ -+ while (1) { -+ struct ubifs_zbranch *zbr; -+ -+ exact = ubifs_search_zbranch(c, znode, key, n); -+ -+ if (znode->level == 0) -+ break; -+ -+ if (*n < 0) -+ *n = 0; -+ zbr = &znode->zbranch[*n]; -+ -+ if (zbr->znode) { -+ znode->time = time; -+ znode = dirty_cow_znode(c, zbr); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ continue; -+ } -+ -+ /* znode is not in TNC cache, load it from the media */ -+ znode = ubifs_load_znode(c, zbr, znode, *n); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ znode = dirty_cow_znode(c, zbr); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ } -+ -+ *zn = znode; -+ if (exact || !is_hash_key(c, key) || *n != -1) { -+ dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); -+ return exact; -+ } -+ -+ /* -+ * See huge comment at 'lookup_level0_dirty()' what is the rest of the -+ * code. -+ */ -+ err = tnc_prev(c, &znode, n); -+ if (err == -ENOENT) { -+ *n = -1; -+ dbg_tnc("found 0, lvl %d, n -1", znode->level); -+ return 0; -+ } -+ if (unlikely(err < 0)) -+ return err; -+ if (keys_cmp(c, key, &znode->zbranch[*n].key)) { -+ *n = -1; -+ dbg_tnc("found 0, lvl %d, n -1", znode->level); -+ return 0; -+ } -+ -+ if (znode->cnext || !ubifs_zn_dirty(znode)) { -+ znode = dirty_cow_bottom_up(c, znode); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ } -+ -+ dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); -+ *zn = znode; -+ return 1; -+} -+ -+/** -+ * maybe_leb_gced - determine if a LEB may have been garbage collected. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number -+ * @gc_seq1: garbage collection sequence number -+ * -+ * This function determines if @lnum may have been garbage collected since -+ * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise -+ * %0 is returned. -+ */ -+static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) -+{ -+ int gc_seq2, gced_lnum; -+ -+ gced_lnum = c->gced_lnum; -+ smp_rmb(); -+ gc_seq2 = c->gc_seq; -+ /* Same seq means no GC */ -+ if (gc_seq1 == gc_seq2) -+ return 0; -+ /* Different by more than 1 means we don't know */ -+ if (gc_seq1 + 1 != gc_seq2) -+ return 1; -+ /* -+ * We have seen the sequence number has increased by 1. Now we need to -+ * be sure we read the right LEB number, so read it again. -+ */ -+ smp_rmb(); -+ if (gced_lnum != c->gced_lnum) -+ return 1; -+ /* Finally we can check lnum */ -+ if (gced_lnum == lnum) -+ return 1; -+ return 0; -+} -+ -+/** -+ * ubifs_tnc_locate - look up a file-system node and return it and its location. -+ * @c: UBIFS file-system description object -+ * @key: node key to lookup -+ * @node: the node is returned here -+ * @lnum: LEB number is returned here -+ * @offs: offset is returned here -+ * -+ * This function look up and reads node with key @key. The caller has to make -+ * sure the @node buffer is large enough to fit the node. Returns zero in case -+ * of success, %-ENOENT if the node was not found, and a negative error code in -+ * case of failure. The node location can be returned in @lnum and @offs. -+ */ -+int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, -+ void *node, int *lnum, int *offs) -+{ -+ int found, n, err, safely = 0, gc_seq1; -+ struct ubifs_znode *znode; -+ struct ubifs_zbranch zbr, *zt; -+ -+again: -+ mutex_lock(&c->tnc_mutex); -+ found = ubifs_lookup_level0(c, key, &znode, &n); -+ if (!found) { -+ err = -ENOENT; -+ goto out; -+ } else if (found < 0) { -+ err = found; -+ goto out; -+ } -+ zt = &znode->zbranch[n]; -+ if (lnum) { -+ *lnum = zt->lnum; -+ *offs = zt->offs; -+ } -+ if (is_hash_key(c, key)) { -+ /* -+ * In this case the leaf node cache gets used, so we pass the -+ * address of the zbranch and keep the mutex locked -+ */ -+ err = tnc_read_node_nm(c, zt, node); -+ goto out; -+ } -+ if (safely) { -+ err = ubifs_tnc_read_node(c, zt, node); -+ goto out; -+ } -+ /* Drop the TNC mutex prematurely and race with garbage collection */ -+ zbr = znode->zbranch[n]; -+ gc_seq1 = c->gc_seq; -+ mutex_unlock(&c->tnc_mutex); -+ -+ if (ubifs_get_wbuf(c, zbr.lnum)) { -+ /* We do not GC journal heads */ -+ err = ubifs_tnc_read_node(c, &zbr, node); -+ return err; -+ } -+ -+ err = fallible_read_node(c, key, &zbr, node); -+ if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { -+ /* -+ * The node may have been GC'ed out from under us so try again -+ * while keeping the TNC mutex locked. -+ */ -+ safely = 1; -+ goto again; -+ } -+ return 0; -+ -+out: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_tnc_get_bu_keys - lookup keys for bulk-read. -+ * @c: UBIFS file-system description object -+ * @bu: bulk-read parameters and results -+ * -+ * Lookup consecutive data node keys for the same inode that reside -+ * consecutively in the same LEB. This function returns zero in case of success -+ * and a negative error code in case of failure. -+ * -+ * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function -+ * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares -+ * maximum possible amount of nodes for bulk-read. -+ */ -+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) -+{ -+ int n, err = 0, lnum = -1, uninitialized_var(offs); -+ int uninitialized_var(len); -+ unsigned int block = key_block(c, &bu->key); -+ struct ubifs_znode *znode; -+ -+ bu->cnt = 0; -+ bu->blk_cnt = 0; -+ bu->eof = 0; -+ -+ mutex_lock(&c->tnc_mutex); -+ /* Find first key */ -+ err = ubifs_lookup_level0(c, &bu->key, &znode, &n); -+ if (err < 0) -+ goto out; -+ if (err) { -+ /* Key found */ -+ len = znode->zbranch[n].len; -+ /* The buffer must be big enough for at least 1 node */ -+ if (len > bu->buf_len) { -+ err = -EINVAL; -+ goto out; -+ } -+ /* Add this key */ -+ bu->zbranch[bu->cnt++] = znode->zbranch[n]; -+ bu->blk_cnt += 1; -+ lnum = znode->zbranch[n].lnum; -+ offs = ALIGN(znode->zbranch[n].offs + len, 8); -+ } -+ while (1) { -+ struct ubifs_zbranch *zbr; -+ union ubifs_key *key; -+ unsigned int next_block; -+ -+ /* Find next key */ -+ err = tnc_next(c, &znode, &n); -+ if (err) -+ goto out; -+ zbr = &znode->zbranch[n]; -+ key = &zbr->key; -+ /* See if there is another data key for this file */ -+ if (key_inum(c, key) != key_inum(c, &bu->key) || -+ key_type(c, key) != UBIFS_DATA_KEY) { -+ err = -ENOENT; -+ goto out; -+ } -+ if (lnum < 0) { -+ /* First key found */ -+ lnum = zbr->lnum; -+ offs = ALIGN(zbr->offs + zbr->len, 8); -+ len = zbr->len; -+ if (len > bu->buf_len) { -+ err = -EINVAL; -+ goto out; -+ } -+ } else { -+ /* -+ * The data nodes must be in consecutive positions in -+ * the same LEB. -+ */ -+ if (zbr->lnum != lnum || zbr->offs != offs) -+ goto out; -+ offs += ALIGN(zbr->len, 8); -+ len = ALIGN(len, 8) + zbr->len; -+ /* Must not exceed buffer length */ -+ if (len > bu->buf_len) -+ goto out; -+ } -+ /* Allow for holes */ -+ next_block = key_block(c, key); -+ bu->blk_cnt += (next_block - block - 1); -+ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) -+ goto out; -+ block = next_block; -+ /* Add this key */ -+ bu->zbranch[bu->cnt++] = *zbr; -+ bu->blk_cnt += 1; -+ /* See if we have room for more */ -+ if (bu->cnt >= UBIFS_MAX_BULK_READ) -+ goto out; -+ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) -+ goto out; -+ } -+out: -+ if (err == -ENOENT) { -+ bu->eof = 1; -+ err = 0; -+ } -+ bu->gc_seq = c->gc_seq; -+ mutex_unlock(&c->tnc_mutex); -+ if (err) -+ return err; -+ /* -+ * An enormous hole could cause bulk-read to encompass too many -+ * page cache pages, so limit the number here. -+ */ -+ if (bu->blk_cnt > UBIFS_MAX_BULK_READ) -+ bu->blk_cnt = UBIFS_MAX_BULK_READ; -+ /* -+ * Ensure that bulk-read covers a whole number of page cache -+ * pages. -+ */ -+ if (UBIFS_BLOCKS_PER_PAGE == 1 || -+ !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1))) -+ return 0; -+ if (bu->eof) { -+ /* At the end of file we can round up */ -+ bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1; -+ return 0; -+ } -+ /* Exclude data nodes that do not make up a whole page cache page */ -+ block = key_block(c, &bu->key) + bu->blk_cnt; -+ block &= ~(UBIFS_BLOCKS_PER_PAGE - 1); -+ while (bu->cnt) { -+ if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block) -+ break; -+ bu->cnt -= 1; -+ } -+ return 0; -+} -+ -+/** -+ * read_wbuf - bulk-read from a LEB with a wbuf. -+ * @wbuf: wbuf that may overlap the read -+ * @buf: buffer into which to read -+ * @len: read length -+ * @lnum: LEB number from which to read -+ * @offs: offset from which to read -+ * -+ * This functions returns %0 on success or a negative error code on failure. -+ */ -+static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, -+ int offs) -+{ -+ const struct ubifs_info *c = wbuf->c; -+ int rlen, overlap; -+ -+ dbg_io("LEB %d:%d, length %d", lnum, offs, len); -+ ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); -+ ubifs_assert(!(offs & 7) && offs < c->leb_size); -+ ubifs_assert(offs + len <= c->leb_size); -+ -+ spin_lock(&wbuf->lock); -+ overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); -+ if (!overlap) { -+ /* We may safely unlock the write-buffer and read the data */ -+ spin_unlock(&wbuf->lock); -+ return ubi_read(c->ubi, lnum, buf, offs, len); -+ } -+ -+ /* Don't read under wbuf */ -+ rlen = wbuf->offs - offs; -+ if (rlen < 0) -+ rlen = 0; -+ -+ /* Copy the rest from the write-buffer */ -+ memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); -+ spin_unlock(&wbuf->lock); -+ -+ if (rlen > 0) -+ /* Read everything that goes before write-buffer */ -+ return ubi_read(c->ubi, lnum, buf, offs, rlen); -+ -+ return 0; -+} -+ -+/** -+ * validate_data_node - validate data nodes for bulk-read. -+ * @c: UBIFS file-system description object -+ * @buf: buffer containing data node to validate -+ * @zbr: zbranch of data node to validate -+ * -+ * This functions returns %0 on success or a negative error code on failure. -+ */ -+static int validate_data_node(struct ubifs_info *c, void *buf, -+ struct ubifs_zbranch *zbr) -+{ -+ union ubifs_key key1; -+ struct ubifs_ch *ch = buf; -+ int err, len; -+ -+ if (ch->node_type != UBIFS_DATA_NODE) { -+ ubifs_err("bad node type (%d but expected %d)", -+ ch->node_type, UBIFS_DATA_NODE); -+ goto out_err; -+ } -+ -+ err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); -+ if (err) { -+ ubifs_err("expected node type %d", UBIFS_DATA_NODE); -+ goto out; -+ } -+ -+ len = le32_to_cpu(ch->len); -+ if (len != zbr->len) { -+ ubifs_err("bad node length %d, expected %d", len, zbr->len); -+ goto out_err; -+ } -+ -+ /* Make sure the key of the read node is correct */ -+ key_read(c, buf + UBIFS_KEY_OFFSET, &key1); -+ if (!keys_eq(c, &zbr->key, &key1)) { -+ ubifs_err("bad key in node at LEB %d:%d", -+ zbr->lnum, zbr->offs); -+ dbg_tnc("looked for key %s found node's key %s", -+ DBGKEY(&zbr->key), DBGKEY1(&key1)); -+ goto out_err; -+ } -+ -+ return 0; -+ -+out_err: -+ err = -EINVAL; -+out: -+ ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); -+ dbg_dump_node(c, buf); -+ dbg_dump_stack(); -+ return err; -+} -+ -+/** -+ * ubifs_tnc_bulk_read - read a number of data nodes in one go. -+ * @c: UBIFS file-system description object -+ * @bu: bulk-read parameters and results -+ * -+ * This functions reads and validates the data nodes that were identified by the -+ * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success, -+ * -EAGAIN to indicate a race with GC, or another negative error code on -+ * failure. -+ */ -+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) -+{ -+ int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i; -+ struct ubifs_wbuf *wbuf; -+ void *buf; -+ -+ len = bu->zbranch[bu->cnt - 1].offs; -+ len += bu->zbranch[bu->cnt - 1].len - offs; -+ if (len > bu->buf_len) { -+ ubifs_err("buffer too small %d vs %d", bu->buf_len, len); -+ return -EINVAL; -+ } -+ -+ /* Do the read */ -+ wbuf = ubifs_get_wbuf(c, lnum); -+ if (wbuf) -+ err = read_wbuf(wbuf, bu->buf, len, lnum, offs); -+ else -+ err = ubi_read(c->ubi, lnum, bu->buf, offs, len); -+ -+ /* Check for a race with GC */ -+ if (maybe_leb_gced(c, lnum, bu->gc_seq)) -+ return -EAGAIN; -+ -+ if (err && err != -EBADMSG) { -+ ubifs_err("failed to read from LEB %d:%d, error %d", -+ lnum, offs, err); -+ dbg_dump_stack(); -+ dbg_tnc("key %s", DBGKEY(&bu->key)); -+ return err; -+ } -+ -+ /* Validate the nodes read */ -+ buf = bu->buf; -+ for (i = 0; i < bu->cnt; i++) { -+ err = validate_data_node(c, buf, &bu->zbranch[i]); -+ if (err) -+ return err; -+ buf = buf + ALIGN(bu->zbranch[i].len, 8); -+ } -+ -+ return 0; -+} -+ -+/** -+ * do_lookup_nm- look up a "hashed" node. -+ * @c: UBIFS file-system description object -+ * @key: node key to lookup -+ * @node: the node is returned here -+ * @nm: node name -+ * -+ * This function look up and reads a node which contains name hash in the key. -+ * Since the hash may have collisions, there may be many nodes with the same -+ * key, so we have to sequentially look to all of them until the needed one is -+ * found. This function returns zero in case of success, %-ENOENT if the node -+ * was not found, and a negative error code in case of failure. -+ */ -+static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, -+ void *node, const struct qstr *nm) -+{ -+ int found, n, err; -+ struct ubifs_znode *znode; -+ -+ dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); -+ mutex_lock(&c->tnc_mutex); -+ found = ubifs_lookup_level0(c, key, &znode, &n); -+ if (!found) { -+ err = -ENOENT; -+ goto out_unlock; -+ } else if (found < 0) { -+ err = found; -+ goto out_unlock; -+ } -+ -+ ubifs_assert(n >= 0); -+ -+ err = resolve_collision(c, key, &znode, &n, nm); -+ dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); -+ if (unlikely(err < 0)) -+ goto out_unlock; -+ if (err == 0) { -+ err = -ENOENT; -+ goto out_unlock; -+ } -+ -+ err = tnc_read_node_nm(c, &znode->zbranch[n], node); -+ -+out_unlock: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_tnc_lookup_nm - look up a "hashed" node. -+ * @c: UBIFS file-system description object -+ * @key: node key to lookup -+ * @node: the node is returned here -+ * @nm: node name -+ * -+ * This function look up and reads a node which contains name hash in the key. -+ * Since the hash may have collisions, there may be many nodes with the same -+ * key, so we have to sequentially look to all of them until the needed one is -+ * found. This function returns zero in case of success, %-ENOENT if the node -+ * was not found, and a negative error code in case of failure. -+ */ -+int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, -+ void *node, const struct qstr *nm) -+{ -+ int err, len; -+ const struct ubifs_dent_node *dent = node; -+ -+ /* -+ * We assume that in most of the cases there are no name collisions and -+ * 'ubifs_tnc_lookup()' returns us the right direntry. -+ */ -+ err = ubifs_tnc_lookup(c, key, node); -+ if (err) -+ return err; -+ -+ len = le16_to_cpu(dent->nlen); -+ if (nm->len == len && !memcmp(dent->name, nm->name, len)) -+ return 0; -+ -+ /* -+ * Unluckily, there are hash collisions and we have to iterate over -+ * them look at each direntry with colliding name hash sequentially. -+ */ -+ return do_lookup_nm(c, key, node, nm); -+} -+ -+/** -+ * correct_parent_keys - correct parent znodes' keys. -+ * @c: UBIFS file-system description object -+ * @znode: znode to correct parent znodes for -+ * -+ * This is a helper function for 'tnc_insert()'. When the key of the leftmost -+ * zbranch changes, keys of parent znodes have to be corrected. This helper -+ * function is called in such situations and corrects the keys if needed. -+ */ -+static void correct_parent_keys(const struct ubifs_info *c, -+ struct ubifs_znode *znode) -+{ -+ union ubifs_key *key, *key1; -+ -+ ubifs_assert(znode->parent); -+ ubifs_assert(znode->iip == 0); -+ -+ key = &znode->zbranch[0].key; -+ key1 = &znode->parent->zbranch[0].key; -+ -+ while (keys_cmp(c, key, key1) < 0) { -+ key_copy(c, key, key1); -+ znode = znode->parent; -+ znode->alt = 1; -+ if (!znode->parent || znode->iip) -+ break; -+ key1 = &znode->parent->zbranch[0].key; -+ } -+} -+ -+/** -+ * insert_zbranch - insert a zbranch into a znode. -+ * @znode: znode into which to insert -+ * @zbr: zbranch to insert -+ * @n: slot number to insert to -+ * -+ * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in -+ * znode's array of zbranches and keeps zbranches consolidated, so when a new -+ * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th -+ * slot, zbranches starting from @n have to be moved right. -+ */ -+static void insert_zbranch(struct ubifs_znode *znode, -+ const struct ubifs_zbranch *zbr, int n) -+{ -+ int i; -+ -+ ubifs_assert(ubifs_zn_dirty(znode)); -+ -+ if (znode->level) { -+ for (i = znode->child_cnt; i > n; i--) { -+ znode->zbranch[i] = znode->zbranch[i - 1]; -+ if (znode->zbranch[i].znode) -+ znode->zbranch[i].znode->iip = i; -+ } -+ if (zbr->znode) -+ zbr->znode->iip = n; -+ } else -+ for (i = znode->child_cnt; i > n; i--) -+ znode->zbranch[i] = znode->zbranch[i - 1]; -+ -+ znode->zbranch[n] = *zbr; -+ znode->child_cnt += 1; -+ -+ /* -+ * After inserting at slot zero, the lower bound of the key range of -+ * this znode may have changed. If this znode is subsequently split -+ * then the upper bound of the key range may change, and furthermore -+ * it could change to be lower than the original lower bound. If that -+ * happens, then it will no longer be possible to find this znode in the -+ * TNC using the key from the index node on flash. That is bad because -+ * if it is not found, we will assume it is obsolete and may overwrite -+ * it. Then if there is an unclean unmount, we will start using the -+ * old index which will be broken. -+ * -+ * So we first mark znodes that have insertions at slot zero, and then -+ * if they are split we add their lnum/offs to the old_idx tree. -+ */ -+ if (n == 0) -+ znode->alt = 1; -+} -+ -+/** -+ * tnc_insert - insert a node into TNC. -+ * @c: UBIFS file-system description object -+ * @znode: znode to insert into -+ * @zbr: branch to insert -+ * @n: slot number to insert new zbranch to -+ * -+ * This function inserts a new node described by @zbr into znode @znode. If -+ * znode does not have a free slot for new zbranch, it is split. Parent znodes -+ * are splat as well if needed. Returns zero in case of success or a negative -+ * error code in case of failure. -+ */ -+static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, -+ struct ubifs_zbranch *zbr, int n) -+{ -+ struct ubifs_znode *zn, *zi, *zp; -+ int i, keep, move, appending = 0; -+ union ubifs_key *key = &zbr->key, *key1; -+ -+ ubifs_assert(n >= 0 && n <= c->fanout); -+ -+ /* Implement naive insert for now */ -+again: -+ zp = znode->parent; -+ if (znode->child_cnt < c->fanout) { -+ ubifs_assert(n != c->fanout); -+ dbg_tnc("inserted at %d level %d, key %s", n, znode->level, -+ DBGKEY(key)); -+ -+ insert_zbranch(znode, zbr, n); -+ -+ /* Ensure parent's key is correct */ -+ if (n == 0 && zp && znode->iip == 0) -+ correct_parent_keys(c, znode); -+ -+ return 0; -+ } -+ -+ /* -+ * Unfortunately, @znode does not have more empty slots and we have to -+ * split it. -+ */ -+ dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); -+ -+ if (znode->alt) -+ /* -+ * We can no longer be sure of finding this znode by key, so we -+ * record it in the old_idx tree. -+ */ -+ ins_clr_old_idx_znode(c, znode); -+ -+ zn = kzalloc(c->max_znode_sz, GFP_NOFS); -+ if (!zn) -+ return -ENOMEM; -+ zn->parent = zp; -+ zn->level = znode->level; -+ -+ /* Decide where to split */ -+ if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) { -+ /* Try not to split consecutive data keys */ -+ if (n == c->fanout) { -+ key1 = &znode->zbranch[n - 1].key; -+ if (key_inum(c, key1) == key_inum(c, key) && -+ key_type(c, key1) == UBIFS_DATA_KEY) -+ appending = 1; -+ } else -+ goto check_split; -+ } else if (appending && n != c->fanout) { -+ /* Try not to split consecutive data keys */ -+ appending = 0; -+check_split: -+ if (n >= (c->fanout + 1) / 2) { -+ key1 = &znode->zbranch[0].key; -+ if (key_inum(c, key1) == key_inum(c, key) && -+ key_type(c, key1) == UBIFS_DATA_KEY) { -+ key1 = &znode->zbranch[n].key; -+ if (key_inum(c, key1) != key_inum(c, key) || -+ key_type(c, key1) != UBIFS_DATA_KEY) { -+ keep = n; -+ move = c->fanout - keep; -+ zi = znode; -+ goto do_split; -+ } -+ } -+ } -+ } -+ -+ if (appending) { -+ keep = c->fanout; -+ move = 0; -+ } else { -+ keep = (c->fanout + 1) / 2; -+ move = c->fanout - keep; -+ } -+ -+ /* -+ * Although we don't at present, we could look at the neighbors and see -+ * if we can move some zbranches there. -+ */ -+ -+ if (n < keep) { -+ /* Insert into existing znode */ -+ zi = znode; -+ move += 1; -+ keep -= 1; -+ } else { -+ /* Insert into new znode */ -+ zi = zn; -+ n -= keep; -+ /* Re-parent */ -+ if (zn->level != 0) -+ zbr->znode->parent = zn; -+ } -+ -+do_split: -+ -+ __set_bit(DIRTY_ZNODE, &zn->flags); -+ atomic_long_inc(&c->dirty_zn_cnt); -+ -+ zn->child_cnt = move; -+ znode->child_cnt = keep; -+ -+ dbg_tnc("moving %d, keeping %d", move, keep); -+ -+ /* Move zbranch */ -+ for (i = 0; i < move; i++) { -+ zn->zbranch[i] = znode->zbranch[keep + i]; -+ /* Re-parent */ -+ if (zn->level != 0) -+ if (zn->zbranch[i].znode) { -+ zn->zbranch[i].znode->parent = zn; -+ zn->zbranch[i].znode->iip = i; -+ } -+ } -+ -+ /* Insert new key and branch */ -+ dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); -+ -+ insert_zbranch(zi, zbr, n); -+ -+ /* Insert new znode (produced by spitting) into the parent */ -+ if (zp) { -+ if (n == 0 && zi == znode && znode->iip == 0) -+ correct_parent_keys(c, znode); -+ -+ /* Locate insertion point */ -+ n = znode->iip + 1; -+ -+ /* Tail recursion */ -+ zbr->key = zn->zbranch[0].key; -+ zbr->znode = zn; -+ zbr->lnum = 0; -+ zbr->offs = 0; -+ zbr->len = 0; -+ znode = zp; -+ -+ goto again; -+ } -+ -+ /* We have to split root znode */ -+ dbg_tnc("creating new zroot at level %d", znode->level + 1); -+ -+ zi = kzalloc(c->max_znode_sz, GFP_NOFS); -+ if (!zi) -+ return -ENOMEM; -+ -+ zi->child_cnt = 2; -+ zi->level = znode->level + 1; -+ -+ __set_bit(DIRTY_ZNODE, &zi->flags); -+ atomic_long_inc(&c->dirty_zn_cnt); -+ -+ zi->zbranch[0].key = znode->zbranch[0].key; -+ zi->zbranch[0].znode = znode; -+ zi->zbranch[0].lnum = c->zroot.lnum; -+ zi->zbranch[0].offs = c->zroot.offs; -+ zi->zbranch[0].len = c->zroot.len; -+ zi->zbranch[1].key = zn->zbranch[0].key; -+ zi->zbranch[1].znode = zn; -+ -+ c->zroot.lnum = 0; -+ c->zroot.offs = 0; -+ c->zroot.len = 0; -+ c->zroot.znode = zi; -+ -+ zn->parent = zi; -+ zn->iip = 1; -+ znode->parent = zi; -+ znode->iip = 0; -+ -+ return 0; -+} -+ -+/** -+ * ubifs_tnc_add - add a node to TNC. -+ * @c: UBIFS file-system description object -+ * @key: key to add -+ * @lnum: LEB number of node -+ * @offs: node offset -+ * @len: node length -+ * -+ * This function adds a node with key @key to TNC. The node may be new or it may -+ * obsolete some existing one. Returns %0 on success or negative error code on -+ * failure. -+ */ -+int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, -+ int offs, int len) -+{ -+ int found, n, err = 0; -+ struct ubifs_znode *znode; -+ -+ mutex_lock(&c->tnc_mutex); -+ dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); -+ found = lookup_level0_dirty(c, key, &znode, &n); -+ if (!found) { -+ struct ubifs_zbranch zbr; -+ -+ zbr.znode = NULL; -+ zbr.lnum = lnum; -+ zbr.offs = offs; -+ zbr.len = len; -+ key_copy(c, key, &zbr.key); -+ err = tnc_insert(c, znode, &zbr, n + 1); -+ } else if (found == 1) { -+ struct ubifs_zbranch *zbr = &znode->zbranch[n]; -+ -+ lnc_free(zbr); -+ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); -+ zbr->lnum = lnum; -+ zbr->offs = offs; -+ zbr->len = len; -+ } else -+ err = found; -+ if (!err) -+ err = dbg_check_tnc(c, 0); -+ mutex_unlock(&c->tnc_mutex); -+ -+ return err; -+} -+ -+/** -+ * ubifs_tnc_replace - replace a node in the TNC only if the old node is found. -+ * @c: UBIFS file-system description object -+ * @key: key to add -+ * @old_lnum: LEB number of old node -+ * @old_offs: old node offset -+ * @lnum: LEB number of node -+ * @offs: node offset -+ * @len: node length -+ * -+ * This function replaces a node with key @key in the TNC only if the old node -+ * is found. This function is called by garbage collection when node are moved. -+ * Returns %0 on success or negative error code on failure. -+ */ -+int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, -+ int old_lnum, int old_offs, int lnum, int offs, int len) -+{ -+ int found, n, err = 0; -+ struct ubifs_znode *znode; -+ -+ mutex_lock(&c->tnc_mutex); -+ dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, -+ old_offs, lnum, offs, len, DBGKEY(key)); -+ found = lookup_level0_dirty(c, key, &znode, &n); -+ if (found < 0) { -+ err = found; -+ goto out_unlock; -+ } -+ -+ if (found == 1) { -+ struct ubifs_zbranch *zbr = &znode->zbranch[n]; -+ -+ found = 0; -+ if (zbr->lnum == old_lnum && zbr->offs == old_offs) { -+ lnc_free(zbr); -+ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); -+ if (err) -+ goto out_unlock; -+ zbr->lnum = lnum; -+ zbr->offs = offs; -+ zbr->len = len; -+ found = 1; -+ } else if (is_hash_key(c, key)) { -+ found = resolve_collision_directly(c, key, &znode, &n, -+ old_lnum, old_offs); -+ dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d", -+ found, znode, n, old_lnum, old_offs); -+ if (found < 0) { -+ err = found; -+ goto out_unlock; -+ } -+ -+ if (found) { -+ /* Ensure the znode is dirtied */ -+ if (znode->cnext || !ubifs_zn_dirty(znode)) { -+ znode = dirty_cow_bottom_up(c, znode); -+ if (IS_ERR(znode)) { -+ err = PTR_ERR(znode); -+ goto out_unlock; -+ } -+ } -+ zbr = &znode->zbranch[n]; -+ lnc_free(zbr); -+ err = ubifs_add_dirt(c, zbr->lnum, -+ zbr->len); -+ if (err) -+ goto out_unlock; -+ zbr->lnum = lnum; -+ zbr->offs = offs; -+ zbr->len = len; -+ } -+ } -+ } -+ -+ if (!found) -+ err = ubifs_add_dirt(c, lnum, len); -+ -+ if (!err) -+ err = dbg_check_tnc(c, 0); -+ -+out_unlock: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_tnc_add_nm - add a "hashed" node to TNC. -+ * @c: UBIFS file-system description object -+ * @key: key to add -+ * @lnum: LEB number of node -+ * @offs: node offset -+ * @len: node length -+ * @nm: node name -+ * -+ * This is the same as 'ubifs_tnc_add()' but it should be used with keys which -+ * may have collisions, like directory entry keys. -+ */ -+int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, -+ int lnum, int offs, int len, const struct qstr *nm) -+{ -+ int found, n, err = 0; -+ struct ubifs_znode *znode; -+ -+ mutex_lock(&c->tnc_mutex); -+ dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, -+ DBGKEY(key)); -+ found = lookup_level0_dirty(c, key, &znode, &n); -+ if (found < 0) { -+ err = found; -+ goto out_unlock; -+ } -+ -+ if (found == 1) { -+ if (c->replaying) -+ found = fallible_resolve_collision(c, key, &znode, &n, -+ nm, 1); -+ else -+ found = resolve_collision(c, key, &znode, &n, nm); -+ dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n); -+ if (found < 0) { -+ err = found; -+ goto out_unlock; -+ } -+ -+ /* Ensure the znode is dirtied */ -+ if (znode->cnext || !ubifs_zn_dirty(znode)) { -+ znode = dirty_cow_bottom_up(c, znode); -+ if (IS_ERR(znode)) { -+ err = PTR_ERR(znode); -+ goto out_unlock; -+ } -+ } -+ -+ if (found == 1) { -+ struct ubifs_zbranch *zbr = &znode->zbranch[n]; -+ -+ lnc_free(zbr); -+ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); -+ zbr->lnum = lnum; -+ zbr->offs = offs; -+ zbr->len = len; -+ goto out_unlock; -+ } -+ } -+ -+ if (!found) { -+ struct ubifs_zbranch zbr; -+ -+ zbr.znode = NULL; -+ zbr.lnum = lnum; -+ zbr.offs = offs; -+ zbr.len = len; -+ key_copy(c, key, &zbr.key); -+ err = tnc_insert(c, znode, &zbr, n + 1); -+ if (err) -+ goto out_unlock; -+ if (c->replaying) { -+ /* -+ * We did not find it in the index so there may be a -+ * dangling branch still in the index. So we remove it -+ * by passing 'ubifs_tnc_remove_nm()' the same key but -+ * an unmatchable name. -+ */ -+ struct qstr noname = { .len = 0, .name = "" }; -+ -+ err = dbg_check_tnc(c, 0); -+ mutex_unlock(&c->tnc_mutex); -+ if (err) -+ return err; -+ return ubifs_tnc_remove_nm(c, key, &noname); -+ } -+ } -+ -+out_unlock: -+ if (!err) -+ err = dbg_check_tnc(c, 0); -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * tnc_delete - delete a znode form TNC. -+ * @c: UBIFS file-system description object -+ * @znode: znode to delete from -+ * @n: zbranch slot number to delete -+ * -+ * This function deletes a leaf node from @n-th slot of @znode. Returns zero in -+ * case of success and a negative error code in case of failure. -+ */ -+static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) -+{ -+ struct ubifs_zbranch *zbr; -+ struct ubifs_znode *zp; -+ int i, err; -+ -+ /* Delete without merge for now */ -+ ubifs_assert(znode->level == 0); -+ ubifs_assert(n >= 0 && n < c->fanout); -+ dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); -+ -+ zbr = &znode->zbranch[n]; -+ lnc_free(zbr); -+ -+ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); -+ if (err) { -+ dbg_dump_znode(c, znode); -+ return err; -+ } -+ -+ /* We do not "gap" zbranch slots */ -+ for (i = n; i < znode->child_cnt - 1; i++) -+ znode->zbranch[i] = znode->zbranch[i + 1]; -+ znode->child_cnt -= 1; -+ -+ if (znode->child_cnt > 0) -+ return 0; -+ -+ /* -+ * This was the last zbranch, we have to delete this znode from the -+ * parent. -+ */ -+ -+ do { -+ ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); -+ ubifs_assert(ubifs_zn_dirty(znode)); -+ -+ zp = znode->parent; -+ n = znode->iip; -+ -+ atomic_long_dec(&c->dirty_zn_cnt); -+ -+ err = insert_old_idx_znode(c, znode); -+ if (err) -+ return err; -+ -+ if (znode->cnext) { -+ __set_bit(OBSOLETE_ZNODE, &znode->flags); -+ atomic_long_inc(&c->clean_zn_cnt); -+ atomic_long_inc(&ubifs_clean_zn_cnt); -+ } else -+ kfree(znode); -+ znode = zp; -+ } while (znode->child_cnt == 1); /* while removing last child */ -+ -+ /* Remove from znode, entry n - 1 */ -+ znode->child_cnt -= 1; -+ ubifs_assert(znode->level != 0); -+ for (i = n; i < znode->child_cnt; i++) { -+ znode->zbranch[i] = znode->zbranch[i + 1]; -+ if (znode->zbranch[i].znode) -+ znode->zbranch[i].znode->iip = i; -+ } -+ -+ /* -+ * If this is the root and it has only 1 child then -+ * collapse the tree. -+ */ -+ if (!znode->parent) { -+ while (znode->child_cnt == 1 && znode->level != 0) { -+ zp = znode; -+ zbr = &znode->zbranch[0]; -+ znode = get_znode(c, znode, 0); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ znode = dirty_cow_znode(c, zbr); -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ znode->parent = NULL; -+ znode->iip = 0; -+ if (c->zroot.len) { -+ err = insert_old_idx(c, c->zroot.lnum, -+ c->zroot.offs); -+ if (err) -+ return err; -+ } -+ c->zroot.lnum = zbr->lnum; -+ c->zroot.offs = zbr->offs; -+ c->zroot.len = zbr->len; -+ c->zroot.znode = znode; -+ ubifs_assert(!test_bit(OBSOLETE_ZNODE, -+ &zp->flags)); -+ ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); -+ atomic_long_dec(&c->dirty_zn_cnt); -+ -+ if (zp->cnext) { -+ __set_bit(OBSOLETE_ZNODE, &zp->flags); -+ atomic_long_inc(&c->clean_zn_cnt); -+ atomic_long_inc(&ubifs_clean_zn_cnt); -+ } else -+ kfree(zp); -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * ubifs_tnc_remove - remove an index entry of a node. -+ * @c: UBIFS file-system description object -+ * @key: key of node -+ * -+ * Returns %0 on success or negative error code on failure. -+ */ -+int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) -+{ -+ int found, n, err = 0; -+ struct ubifs_znode *znode; -+ -+ mutex_lock(&c->tnc_mutex); -+ dbg_tnc("key %s", DBGKEY(key)); -+ found = lookup_level0_dirty(c, key, &znode, &n); -+ if (found < 0) { -+ err = found; -+ goto out_unlock; -+ } -+ if (found == 1) -+ err = tnc_delete(c, znode, n); -+ if (!err) -+ err = dbg_check_tnc(c, 0); -+ -+out_unlock: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node. -+ * @c: UBIFS file-system description object -+ * @key: key of node -+ * @nm: directory entry name -+ * -+ * Returns %0 on success or negative error code on failure. -+ */ -+int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, -+ const struct qstr *nm) -+{ -+ int n, err; -+ struct ubifs_znode *znode; -+ -+ mutex_lock(&c->tnc_mutex); -+ dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); -+ err = lookup_level0_dirty(c, key, &znode, &n); -+ if (err < 0) -+ goto out_unlock; -+ -+ if (err) { -+ if (c->replaying) -+ err = fallible_resolve_collision(c, key, &znode, &n, -+ nm, 0); -+ else -+ err = resolve_collision(c, key, &znode, &n, nm); -+ dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); -+ if (err < 0) -+ goto out_unlock; -+ if (err) { -+ /* Ensure the znode is dirtied */ -+ if (znode->cnext || !ubifs_zn_dirty(znode)) { -+ znode = dirty_cow_bottom_up(c, znode); -+ if (IS_ERR(znode)) { -+ err = PTR_ERR(znode); -+ goto out_unlock; -+ } -+ } -+ err = tnc_delete(c, znode, n); -+ } -+ } -+ -+out_unlock: -+ if (!err) -+ err = dbg_check_tnc(c, 0); -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * key_in_range - determine if a key falls within a range of keys. -+ * @c: UBIFS file-system description object -+ * @key: key to check -+ * @from_key: lowest key in range -+ * @to_key: highest key in range -+ * -+ * This function returns %1 if the key is in range and %0 otherwise. -+ */ -+static int key_in_range(struct ubifs_info *c, union ubifs_key *key, -+ union ubifs_key *from_key, union ubifs_key *to_key) -+{ -+ if (keys_cmp(c, key, from_key) < 0) -+ return 0; -+ if (keys_cmp(c, key, to_key) > 0) -+ return 0; -+ return 1; -+} -+ -+/** -+ * ubifs_tnc_remove_range - remove index entries in range. -+ * @c: UBIFS file-system description object -+ * @from_key: lowest key to remove -+ * @to_key: highest key to remove -+ * -+ * This function removes index entries starting at @from_key and ending at -+ * @to_key. This function returns zero in case of success and a negative error -+ * code in case of failure. -+ */ -+int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, -+ union ubifs_key *to_key) -+{ -+ int i, n, k, err = 0; -+ struct ubifs_znode *znode; -+ union ubifs_key *key; -+ -+ mutex_lock(&c->tnc_mutex); -+ while (1) { -+ /* Find first level 0 znode that contains keys to remove */ -+ err = ubifs_lookup_level0(c, from_key, &znode, &n); -+ if (err < 0) -+ goto out_unlock; -+ -+ if (err) -+ key = from_key; -+ else { -+ err = tnc_next(c, &znode, &n); -+ if (err == -ENOENT) { -+ err = 0; -+ goto out_unlock; -+ } -+ if (err < 0) -+ goto out_unlock; -+ key = &znode->zbranch[n].key; -+ if (!key_in_range(c, key, from_key, to_key)) { -+ err = 0; -+ goto out_unlock; -+ } -+ } -+ -+ /* Ensure the znode is dirtied */ -+ if (znode->cnext || !ubifs_zn_dirty(znode)) { -+ znode = dirty_cow_bottom_up(c, znode); -+ if (IS_ERR(znode)) { -+ err = PTR_ERR(znode); -+ goto out_unlock; -+ } -+ } -+ -+ /* Remove all keys in range except the first */ -+ for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) { -+ key = &znode->zbranch[i].key; -+ if (!key_in_range(c, key, from_key, to_key)) -+ break; -+ lnc_free(&znode->zbranch[i]); -+ err = ubifs_add_dirt(c, znode->zbranch[i].lnum, -+ znode->zbranch[i].len); -+ if (err) { -+ dbg_dump_znode(c, znode); -+ goto out_unlock; -+ } -+ dbg_tnc("removing %s", DBGKEY(key)); -+ } -+ if (k) { -+ for (i = n + 1 + k; i < znode->child_cnt; i++) -+ znode->zbranch[i - k] = znode->zbranch[i]; -+ znode->child_cnt -= k; -+ } -+ -+ /* Now delete the first */ -+ err = tnc_delete(c, znode, n); -+ if (err) -+ goto out_unlock; -+ } -+ -+out_unlock: -+ if (!err) -+ err = dbg_check_tnc(c, 0); -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_tnc_remove_ino - remove an inode from TNC. -+ * @c: UBIFS file-system description object -+ * @inum: inode number to remove -+ * -+ * This function remove inode @inum and all the extended attributes associated -+ * with the anode from TNC and returns zero in case of success or a negative -+ * error code in case of failure. -+ */ -+int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) -+{ -+ union ubifs_key key1, key2; -+ struct ubifs_dent_node *xent, *pxent = NULL; -+ struct qstr nm = { .name = NULL }; -+ -+ dbg_tnc("ino %lu", (unsigned long)inum); -+ -+ /* -+ * Walk all extended attribute entries and remove them together with -+ * corresponding extended attribute inodes. -+ */ -+ lowest_xent_key(c, &key1, inum); -+ while (1) { -+ ino_t xattr_inum; -+ int err; -+ -+ xent = ubifs_tnc_next_ent(c, &key1, &nm); -+ if (IS_ERR(xent)) { -+ err = PTR_ERR(xent); -+ if (err == -ENOENT) -+ break; -+ return err; -+ } -+ -+ xattr_inum = le64_to_cpu(xent->inum); -+ dbg_tnc("xent '%s', ino %lu", xent->name, -+ (unsigned long)xattr_inum); -+ -+ nm.name = xent->name; -+ nm.len = le16_to_cpu(xent->nlen); -+ err = ubifs_tnc_remove_nm(c, &key1, &nm); -+ if (err) { -+ kfree(xent); -+ return err; -+ } -+ -+ lowest_ino_key(c, &key1, xattr_inum); -+ highest_ino_key(c, &key2, xattr_inum); -+ err = ubifs_tnc_remove_range(c, &key1, &key2); -+ if (err) { -+ kfree(xent); -+ return err; -+ } -+ -+ kfree(pxent); -+ pxent = xent; -+ key_read(c, &xent->key, &key1); -+ } -+ -+ kfree(pxent); -+ lowest_ino_key(c, &key1, inum); -+ highest_ino_key(c, &key2, inum); -+ -+ return ubifs_tnc_remove_range(c, &key1, &key2); -+} -+ -+/** -+ * ubifs_tnc_next_ent - walk directory or extended attribute entries. -+ * @c: UBIFS file-system description object -+ * @key: key of last entry -+ * @nm: name of last entry found or %NULL -+ * -+ * This function finds and reads the next directory or extended attribute entry -+ * after the given key (@key) if there is one. @nm is used to resolve -+ * collisions. -+ * -+ * If the name of the current entry is not known and only the key is known, -+ * @nm->name has to be %NULL. In this case the semantics of this function is a -+ * little bit different and it returns the entry corresponding to this key, not -+ * the next one. If the key was not found, the closest "right" entry is -+ * returned. -+ * -+ * If the fist entry has to be found, @key has to contain the lowest possible -+ * key value for this inode and @name has to be %NULL. -+ * -+ * This function returns the found directory or extended attribute entry node -+ * in case of success, %-ENOENT is returned if no entry was found, and a -+ * negative error code is returned in case of failure. -+ */ -+struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, -+ union ubifs_key *key, -+ const struct qstr *nm) -+{ -+ int n, err, type = key_type(c, key); -+ struct ubifs_znode *znode; -+ struct ubifs_dent_node *dent; -+ struct ubifs_zbranch *zbr; -+ union ubifs_key *dkey; -+ -+ dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); -+ ubifs_assert(is_hash_key(c, key)); -+ -+ mutex_lock(&c->tnc_mutex); -+ err = ubifs_lookup_level0(c, key, &znode, &n); -+ if (unlikely(err < 0)) -+ goto out_unlock; -+ -+ if (nm->name) { -+ if (err) { -+ /* Handle collisions */ -+ err = resolve_collision(c, key, &znode, &n, nm); -+ dbg_tnc("rc returned %d, znode %p, n %d", -+ err, znode, n); -+ if (unlikely(err < 0)) -+ goto out_unlock; -+ } -+ -+ /* Now find next entry */ -+ err = tnc_next(c, &znode, &n); -+ if (unlikely(err)) -+ goto out_unlock; -+ } else { -+ /* -+ * The full name of the entry was not given, in which case the -+ * behavior of this function is a little different and it -+ * returns current entry, not the next one. -+ */ -+ if (!err) { -+ /* -+ * However, the given key does not exist in the TNC -+ * tree and @znode/@n variables contain the closest -+ * "preceding" element. Switch to the next one. -+ */ -+ err = tnc_next(c, &znode, &n); -+ if (err) -+ goto out_unlock; -+ } -+ } -+ -+ zbr = &znode->zbranch[n]; -+ dent = kmalloc(zbr->len, GFP_NOFS); -+ if (unlikely(!dent)) { -+ err = -ENOMEM; -+ goto out_unlock; -+ } -+ -+ /* -+ * The above 'tnc_next()' call could lead us to the next inode, check -+ * this. -+ */ -+ dkey = &zbr->key; -+ if (key_inum(c, dkey) != key_inum(c, key) || -+ key_type(c, dkey) != type) { -+ err = -ENOENT; -+ goto out_free; -+ } -+ -+ err = tnc_read_node_nm(c, zbr, dent); -+ if (unlikely(err)) -+ goto out_free; -+ -+ mutex_unlock(&c->tnc_mutex); -+ return dent; -+ -+out_free: -+ kfree(dent); -+out_unlock: -+ mutex_unlock(&c->tnc_mutex); -+ return ERR_PTR(err); -+} -+ -+/** -+ * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit. -+ * @c: UBIFS file-system description object -+ * -+ * Destroy left-over obsolete znodes from a failed commit. -+ */ -+static void tnc_destroy_cnext(struct ubifs_info *c) -+{ -+ struct ubifs_znode *cnext; -+ -+ if (!c->cnext) -+ return; -+ ubifs_assert(c->cmt_state == COMMIT_BROKEN); -+ cnext = c->cnext; -+ do { -+ struct ubifs_znode *znode = cnext; -+ -+ cnext = cnext->cnext; -+ if (test_bit(OBSOLETE_ZNODE, &znode->flags)) -+ kfree(znode); -+ } while (cnext && cnext != c->cnext); -+} -+ -+/** -+ * ubifs_tnc_close - close TNC subsystem and free all related resources. -+ * @c: UBIFS file-system description object -+ */ -+void ubifs_tnc_close(struct ubifs_info *c) -+{ -+ long clean_freed; -+ -+ tnc_destroy_cnext(c); -+ if (c->zroot.znode) { -+ clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); -+ atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); -+ } -+ kfree(c->gap_lebs); -+ kfree(c->ilebs); -+ destroy_old_idx(c); -+} -+ -+/** -+ * left_znode - get the znode to the left. -+ * @c: UBIFS file-system description object -+ * @znode: znode -+ * -+ * This function returns a pointer to the znode to the left of @znode or NULL if -+ * there is not one. A negative error code is returned on failure. -+ */ -+static struct ubifs_znode *left_znode(struct ubifs_info *c, -+ struct ubifs_znode *znode) -+{ -+ int level = znode->level; -+ -+ while (1) { -+ int n = znode->iip - 1; -+ -+ /* Go up until we can go left */ -+ znode = znode->parent; -+ if (!znode) -+ return NULL; -+ if (n >= 0) { -+ /* Now go down the rightmost branch to 'level' */ -+ znode = get_znode(c, znode, n); -+ if (IS_ERR(znode)) -+ return znode; -+ while (znode->level != level) { -+ n = znode->child_cnt - 1; -+ znode = get_znode(c, znode, n); -+ if (IS_ERR(znode)) -+ return znode; -+ } -+ break; -+ } -+ } -+ return znode; -+} -+ -+/** -+ * right_znode - get the znode to the right. -+ * @c: UBIFS file-system description object -+ * @znode: znode -+ * -+ * This function returns a pointer to the znode to the right of @znode or NULL -+ * if there is not one. A negative error code is returned on failure. -+ */ -+static struct ubifs_znode *right_znode(struct ubifs_info *c, -+ struct ubifs_znode *znode) -+{ -+ int level = znode->level; -+ -+ while (1) { -+ int n = znode->iip + 1; -+ -+ /* Go up until we can go right */ -+ znode = znode->parent; -+ if (!znode) -+ return NULL; -+ if (n < znode->child_cnt) { -+ /* Now go down the leftmost branch to 'level' */ -+ znode = get_znode(c, znode, n); -+ if (IS_ERR(znode)) -+ return znode; -+ while (znode->level != level) { -+ znode = get_znode(c, znode, 0); -+ if (IS_ERR(znode)) -+ return znode; -+ } -+ break; -+ } -+ } -+ return znode; -+} -+ -+/** -+ * lookup_znode - find a particular indexing node from TNC. -+ * @c: UBIFS file-system description object -+ * @key: index node key to lookup -+ * @level: index node level -+ * @lnum: index node LEB number -+ * @offs: index node offset -+ * -+ * This function searches an indexing node by its first key @key and its -+ * address @lnum:@offs. It looks up the indexing tree by pulling all indexing -+ * nodes it traverses to TNC. This function is called fro indexing nodes which -+ * were found on the media by scanning, for example when garbage-collecting or -+ * when doing in-the-gaps commit. This means that the indexing node which is -+ * looked for does not have to have exactly the same leftmost key @key, because -+ * the leftmost key may have been changed, in which case TNC will contain a -+ * dirty znode which still refers the same @lnum:@offs. This function is clever -+ * enough to recognize such indexing nodes. -+ * -+ * Note, if a znode was deleted or changed too much, then this function will -+ * not find it. For situations like this UBIFS has the old index RB-tree -+ * (indexed by @lnum:@offs). -+ * -+ * This function returns a pointer to the znode found or %NULL if it is not -+ * found. A negative error code is returned on failure. -+ */ -+static struct ubifs_znode *lookup_znode(struct ubifs_info *c, -+ union ubifs_key *key, int level, -+ int lnum, int offs) -+{ -+ struct ubifs_znode *znode, *zn; -+ int n, nn; -+ -+ /* -+ * The arguments have probably been read off flash, so don't assume -+ * they are valid. -+ */ -+ if (level < 0) -+ return ERR_PTR(-EINVAL); -+ -+ /* Get the root znode */ -+ znode = c->zroot.znode; -+ if (!znode) { -+ znode = ubifs_load_znode(c, &c->zroot, NULL, 0); -+ if (IS_ERR(znode)) -+ return znode; -+ } -+ /* Check if it is the one we are looking for */ -+ if (c->zroot.lnum == lnum && c->zroot.offs == offs) -+ return znode; -+ /* Descend to the parent level i.e. (level + 1) */ -+ if (level >= znode->level) -+ return NULL; -+ while (1) { -+ ubifs_search_zbranch(c, znode, key, &n); -+ if (n < 0) { -+ /* -+ * We reached a znode where the leftmost key is greater -+ * than the key we are searching for. This is the same -+ * situation as the one described in a huge comment at -+ * the end of the 'ubifs_lookup_level0()' function. And -+ * for exactly the same reasons we have to try to look -+ * left before giving up. -+ */ -+ znode = left_znode(c, znode); -+ if (!znode) -+ return NULL; -+ if (IS_ERR(znode)) -+ return znode; -+ ubifs_search_zbranch(c, znode, key, &n); -+ ubifs_assert(n >= 0); -+ } -+ if (znode->level == level + 1) -+ break; -+ znode = get_znode(c, znode, n); -+ if (IS_ERR(znode)) -+ return znode; -+ } -+ /* Check if the child is the one we are looking for */ -+ if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs) -+ return get_znode(c, znode, n); -+ /* If the key is unique, there is nowhere else to look */ -+ if (!is_hash_key(c, key)) -+ return NULL; -+ /* -+ * The key is not unique and so may be also in the znodes to either -+ * side. -+ */ -+ zn = znode; -+ nn = n; -+ /* Look left */ -+ while (1) { -+ /* Move one branch to the left */ -+ if (n) -+ n -= 1; -+ else { -+ znode = left_znode(c, znode); -+ if (!znode) -+ break; -+ if (IS_ERR(znode)) -+ return znode; -+ n = znode->child_cnt - 1; -+ } -+ /* Check it */ -+ if (znode->zbranch[n].lnum == lnum && -+ znode->zbranch[n].offs == offs) -+ return get_znode(c, znode, n); -+ /* Stop if the key is less than the one we are looking for */ -+ if (keys_cmp(c, &znode->zbranch[n].key, key) < 0) -+ break; -+ } -+ /* Back to the middle */ -+ znode = zn; -+ n = nn; -+ /* Look right */ -+ while (1) { -+ /* Move one branch to the right */ -+ if (++n >= znode->child_cnt) { -+ znode = right_znode(c, znode); -+ if (!znode) -+ break; -+ if (IS_ERR(znode)) -+ return znode; -+ n = 0; -+ } -+ /* Check it */ -+ if (znode->zbranch[n].lnum == lnum && -+ znode->zbranch[n].offs == offs) -+ return get_znode(c, znode, n); -+ /* Stop if the key is greater than the one we are looking for */ -+ if (keys_cmp(c, &znode->zbranch[n].key, key) > 0) -+ break; -+ } -+ return NULL; -+} -+ -+/** -+ * is_idx_node_in_tnc - determine if an index node is in the TNC. -+ * @c: UBIFS file-system description object -+ * @key: key of index node -+ * @level: index node level -+ * @lnum: LEB number of index node -+ * @offs: offset of index node -+ * -+ * This function returns %0 if the index node is not referred to in the TNC, %1 -+ * if the index node is referred to in the TNC and the corresponding znode is -+ * dirty, %2 if an index node is referred to in the TNC and the corresponding -+ * znode is clean, and a negative error code in case of failure. -+ * -+ * Note, the @key argument has to be the key of the first child. Also note, -+ * this function relies on the fact that 0:0 is never a valid LEB number and -+ * offset for a main-area node. -+ */ -+int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, -+ int lnum, int offs) -+{ -+ struct ubifs_znode *znode; -+ -+ znode = lookup_znode(c, key, level, lnum, offs); -+ if (!znode) -+ return 0; -+ if (IS_ERR(znode)) -+ return PTR_ERR(znode); -+ -+ return ubifs_zn_dirty(znode) ? 1 : 2; -+} -+ -+/** -+ * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC. -+ * @c: UBIFS file-system description object -+ * @key: node key -+ * @lnum: node LEB number -+ * @offs: node offset -+ * -+ * This function returns %1 if the node is referred to in the TNC, %0 if it is -+ * not, and a negative error code in case of failure. -+ * -+ * Note, this function relies on the fact that 0:0 is never a valid LEB number -+ * and offset for a main-area node. -+ */ -+static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, -+ int lnum, int offs) -+{ -+ struct ubifs_zbranch *zbr; -+ struct ubifs_znode *znode, *zn; -+ int n, found, err, nn; -+ const int unique = !is_hash_key(c, key); -+ -+ found = ubifs_lookup_level0(c, key, &znode, &n); -+ if (found < 0) -+ return found; /* Error code */ -+ if (!found) -+ return 0; -+ zbr = &znode->zbranch[n]; -+ if (lnum == zbr->lnum && offs == zbr->offs) -+ return 1; /* Found it */ -+ if (unique) -+ return 0; -+ /* -+ * Because the key is not unique, we have to look left -+ * and right as well -+ */ -+ zn = znode; -+ nn = n; -+ /* Look left */ -+ while (1) { -+ err = tnc_prev(c, &znode, &n); -+ if (err == -ENOENT) -+ break; -+ if (err) -+ return err; -+ if (keys_cmp(c, key, &znode->zbranch[n].key)) -+ break; -+ zbr = &znode->zbranch[n]; -+ if (lnum == zbr->lnum && offs == zbr->offs) -+ return 1; /* Found it */ -+ } -+ /* Look right */ -+ znode = zn; -+ n = nn; -+ while (1) { -+ err = tnc_next(c, &znode, &n); -+ if (err) { -+ if (err == -ENOENT) -+ return 0; -+ return err; -+ } -+ if (keys_cmp(c, key, &znode->zbranch[n].key)) -+ break; -+ zbr = &znode->zbranch[n]; -+ if (lnum == zbr->lnum && offs == zbr->offs) -+ return 1; /* Found it */ -+ } -+ return 0; -+} -+ -+/** -+ * ubifs_tnc_has_node - determine whether a node is in the TNC. -+ * @c: UBIFS file-system description object -+ * @key: node key -+ * @level: index node level (if it is an index node) -+ * @lnum: node LEB number -+ * @offs: node offset -+ * @is_idx: non-zero if the node is an index node -+ * -+ * This function returns %1 if the node is in the TNC, %0 if it is not, and a -+ * negative error code in case of failure. For index nodes, @key has to be the -+ * key of the first child. An index node is considered to be in the TNC only if -+ * the corresponding znode is clean or has not been loaded. -+ */ -+int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, -+ int lnum, int offs, int is_idx) -+{ -+ int err; -+ -+ mutex_lock(&c->tnc_mutex); -+ if (is_idx) { -+ err = is_idx_node_in_tnc(c, key, level, lnum, offs); -+ if (err < 0) -+ goto out_unlock; -+ if (err == 1) -+ /* The index node was found but it was dirty */ -+ err = 0; -+ else if (err == 2) -+ /* The index node was found and it was clean */ -+ err = 1; -+ else -+ BUG_ON(err != 0); -+ } else -+ err = is_leaf_node_in_tnc(c, key, lnum, offs); -+ -+out_unlock: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * ubifs_dirty_idx_node - dirty an index node. -+ * @c: UBIFS file-system description object -+ * @key: index node key -+ * @level: index node level -+ * @lnum: index node LEB number -+ * @offs: index node offset -+ * -+ * This function loads and dirties an index node so that it can be garbage -+ * collected. The @key argument has to be the key of the first child. This -+ * function relies on the fact that 0:0 is never a valid LEB number and offset -+ * for a main-area node. Returns %0 on success and a negative error code on -+ * failure. -+ */ -+int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, -+ int lnum, int offs) -+{ -+ struct ubifs_znode *znode; -+ int err = 0; -+ -+ mutex_lock(&c->tnc_mutex); -+ znode = lookup_znode(c, key, level, lnum, offs); -+ if (!znode) -+ goto out_unlock; -+ if (IS_ERR(znode)) { -+ err = PTR_ERR(znode); -+ goto out_unlock; -+ } -+ znode = dirty_cow_bottom_up(c, znode); -+ if (IS_ERR(znode)) { -+ err = PTR_ERR(znode); -+ goto out_unlock; -+ } -+ -+out_unlock: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/tnc_commit.c linux-2.6.24/fs/ubifs/tnc_commit.c ---- linux-2.6.24.orig/fs/ubifs/tnc_commit.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/tnc_commit.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1105 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* This file implements TNC functions for committing */ -+ -+#include "ubifs.h" -+ -+/** -+ * make_idx_node - make an index node for fill-the-gaps method of TNC commit. -+ * @c: UBIFS file-system description object -+ * @idx: buffer in which to place new index node -+ * @znode: znode from which to make new index node -+ * @lnum: LEB number where new index node will be written -+ * @offs: offset where new index node will be written -+ * @len: length of new index node -+ */ -+static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, -+ struct ubifs_znode *znode, int lnum, int offs, int len) -+{ -+ struct ubifs_znode *zp; -+ int i, err; -+ -+ /* Make index node */ -+ idx->ch.node_type = UBIFS_IDX_NODE; -+ idx->child_cnt = cpu_to_le16(znode->child_cnt); -+ idx->level = cpu_to_le16(znode->level); -+ for (i = 0; i < znode->child_cnt; i++) { -+ struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); -+ struct ubifs_zbranch *zbr = &znode->zbranch[i]; -+ -+ key_write_idx(c, &zbr->key, &br->key); -+ br->lnum = cpu_to_le32(zbr->lnum); -+ br->offs = cpu_to_le32(zbr->offs); -+ br->len = cpu_to_le32(zbr->len); -+ if (!zbr->lnum || !zbr->len) { -+ ubifs_err("bad ref in znode"); -+ dbg_dump_znode(c, znode); -+ if (zbr->znode) -+ dbg_dump_znode(c, zbr->znode); -+ } -+ } -+ ubifs_prepare_node(c, idx, len, 0); -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ znode->lnum = lnum; -+ znode->offs = offs; -+ znode->len = len; -+#endif -+ -+ err = insert_old_idx_znode(c, znode); -+ -+ /* Update the parent */ -+ zp = znode->parent; -+ if (zp) { -+ struct ubifs_zbranch *zbr; -+ -+ zbr = &zp->zbranch[znode->iip]; -+ zbr->lnum = lnum; -+ zbr->offs = offs; -+ zbr->len = len; -+ } else { -+ c->zroot.lnum = lnum; -+ c->zroot.offs = offs; -+ c->zroot.len = len; -+ } -+ c->calc_idx_sz += ALIGN(len, 8); -+ -+ atomic_long_dec(&c->dirty_zn_cnt); -+ -+ ubifs_assert(ubifs_zn_dirty(znode)); -+ ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); -+ -+ __clear_bit(DIRTY_ZNODE, &znode->flags); -+ __clear_bit(COW_ZNODE, &znode->flags); -+ -+ return err; -+} -+ -+/** -+ * fill_gap - make index nodes in gaps in dirty index LEBs. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number that gap appears in -+ * @gap_start: offset of start of gap -+ * @gap_end: offset of end of gap -+ * @dirt: adds dirty space to this -+ * -+ * This function returns the number of index nodes written into the gap. -+ */ -+static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end, -+ int *dirt) -+{ -+ int len, gap_remains, gap_pos, written, pad_len; -+ -+ ubifs_assert((gap_start & 7) == 0); -+ ubifs_assert((gap_end & 7) == 0); -+ ubifs_assert(gap_end >= gap_start); -+ -+ gap_remains = gap_end - gap_start; -+ if (!gap_remains) -+ return 0; -+ gap_pos = gap_start; -+ written = 0; -+ while (c->enext) { -+ len = ubifs_idx_node_sz(c, c->enext->child_cnt); -+ if (len < gap_remains) { -+ struct ubifs_znode *znode = c->enext; -+ const int alen = ALIGN(len, 8); -+ int err; -+ -+ ubifs_assert(alen <= gap_remains); -+ err = make_idx_node(c, c->ileb_buf + gap_pos, znode, -+ lnum, gap_pos, len); -+ if (err) -+ return err; -+ gap_remains -= alen; -+ gap_pos += alen; -+ c->enext = znode->cnext; -+ if (c->enext == c->cnext) -+ c->enext = NULL; -+ written += 1; -+ } else -+ break; -+ } -+ if (gap_end == c->leb_size) { -+ c->ileb_len = ALIGN(gap_pos, c->min_io_size); -+ /* Pad to end of min_io_size */ -+ pad_len = c->ileb_len - gap_pos; -+ } else -+ /* Pad to end of gap */ -+ pad_len = gap_remains; -+ dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d", -+ lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len); -+ ubifs_pad(c, c->ileb_buf + gap_pos, pad_len); -+ *dirt += pad_len; -+ return written; -+} -+ -+/** -+ * find_old_idx - find an index node obsoleted since the last commit start. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB number of obsoleted index node -+ * @offs: offset of obsoleted index node -+ * -+ * Returns %1 if found and %0 otherwise. -+ */ -+static int find_old_idx(struct ubifs_info *c, int lnum, int offs) -+{ -+ struct ubifs_old_idx *o; -+ struct rb_node *p; -+ -+ p = c->old_idx.rb_node; -+ while (p) { -+ o = rb_entry(p, struct ubifs_old_idx, rb); -+ if (lnum < o->lnum) -+ p = p->rb_left; -+ else if (lnum > o->lnum) -+ p = p->rb_right; -+ else if (offs < o->offs) -+ p = p->rb_left; -+ else if (offs > o->offs) -+ p = p->rb_right; -+ else -+ return 1; -+ } -+ return 0; -+} -+ -+/** -+ * is_idx_node_in_use - determine if an index node can be overwritten. -+ * @c: UBIFS file-system description object -+ * @key: key of index node -+ * @level: index node level -+ * @lnum: LEB number of index node -+ * @offs: offset of index node -+ * -+ * If @key / @lnum / @offs identify an index node that was not part of the old -+ * index, then this function returns %0 (obsolete). Else if the index node was -+ * part of the old index but is now dirty %1 is returned, else if it is clean %2 -+ * is returned. A negative error code is returned on failure. -+ */ -+static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, -+ int level, int lnum, int offs) -+{ -+ int ret; -+ -+ ret = is_idx_node_in_tnc(c, key, level, lnum, offs); -+ if (ret < 0) -+ return ret; /* Error code */ -+ if (ret == 0) -+ if (find_old_idx(c, lnum, offs)) -+ return 1; -+ return ret; -+} -+ -+/** -+ * layout_leb_in_gaps - layout index nodes using in-the-gaps method. -+ * @c: UBIFS file-system description object -+ * @p: return LEB number here -+ * -+ * This function lays out new index nodes for dirty znodes using in-the-gaps -+ * method of TNC commit. -+ * This function merely puts the next znode into the next gap, making no attempt -+ * to try to maximise the number of znodes that fit. -+ * This function returns the number of index nodes written into the gaps, or a -+ * negative error code on failure. -+ */ -+static int layout_leb_in_gaps(struct ubifs_info *c, int *p) -+{ -+ struct ubifs_scan_leb *sleb; -+ struct ubifs_scan_node *snod; -+ int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written; -+ -+ tot_written = 0; -+ /* Get an index LEB with lots of obsolete index nodes */ -+ lnum = ubifs_find_dirty_idx_leb(c); -+ if (lnum < 0) -+ /* -+ * There also may be dirt in the index head that could be -+ * filled, however we do not check there at present. -+ */ -+ return lnum; /* Error code */ -+ *p = lnum; -+ dbg_gc("LEB %d", lnum); -+ /* -+ * Scan the index LEB. We use the generic scan for this even though -+ * it is more comprehensive and less efficient than is needed for this -+ * purpose. -+ */ -+ sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); -+ c->ileb_len = 0; -+ if (IS_ERR(sleb)) -+ return PTR_ERR(sleb); -+ gap_start = 0; -+ list_for_each_entry(snod, &sleb->nodes, list) { -+ struct ubifs_idx_node *idx; -+ int in_use, level; -+ -+ ubifs_assert(snod->type == UBIFS_IDX_NODE); -+ idx = snod->node; -+ key_read(c, ubifs_idx_key(c, idx), &snod->key); -+ level = le16_to_cpu(idx->level); -+ /* Determine if the index node is in use (not obsolete) */ -+ in_use = is_idx_node_in_use(c, &snod->key, level, lnum, -+ snod->offs); -+ if (in_use < 0) { -+ ubifs_scan_destroy(sleb); -+ return in_use; /* Error code */ -+ } -+ if (in_use) { -+ if (in_use == 1) -+ dirt += ALIGN(snod->len, 8); -+ /* -+ * The obsolete index nodes form gaps that can be -+ * overwritten. This gap has ended because we have -+ * found an index node that is still in use -+ * i.e. not obsolete -+ */ -+ gap_end = snod->offs; -+ /* Try to fill gap */ -+ written = fill_gap(c, lnum, gap_start, gap_end, &dirt); -+ if (written < 0) { -+ ubifs_scan_destroy(sleb); -+ return written; /* Error code */ -+ } -+ tot_written += written; -+ gap_start = ALIGN(snod->offs + snod->len, 8); -+ } -+ } -+ ubifs_scan_destroy(sleb); -+ c->ileb_len = c->leb_size; -+ gap_end = c->leb_size; -+ /* Try to fill gap */ -+ written = fill_gap(c, lnum, gap_start, gap_end, &dirt); -+ if (written < 0) -+ return written; /* Error code */ -+ tot_written += written; -+ if (tot_written == 0) { -+ struct ubifs_lprops lp; -+ -+ dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); -+ err = ubifs_read_one_lp(c, lnum, &lp); -+ if (err) -+ return err; -+ if (lp.free == c->leb_size) { -+ /* -+ * We must have snatched this LEB from the idx_gc list -+ * so we need to correct the free and dirty space. -+ */ -+ err = ubifs_change_one_lp(c, lnum, -+ c->leb_size - c->ileb_len, -+ dirt, 0, 0, 0); -+ if (err) -+ return err; -+ } -+ return 0; -+ } -+ err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt, -+ 0, 0, 0); -+ if (err) -+ return err; -+ err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len, -+ UBI_SHORTTERM); -+ if (err) -+ return err; -+ dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); -+ return tot_written; -+} -+ -+/** -+ * get_leb_cnt - calculate the number of empty LEBs needed to commit. -+ * @c: UBIFS file-system description object -+ * @cnt: number of znodes to commit -+ * -+ * This function returns the number of empty LEBs needed to commit @cnt znodes -+ * to the current index head. The number is not exact and may be more than -+ * needed. -+ */ -+static int get_leb_cnt(struct ubifs_info *c, int cnt) -+{ -+ int d; -+ -+ /* Assume maximum index node size (i.e. overestimate space needed) */ -+ cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz; -+ if (cnt < 0) -+ cnt = 0; -+ d = c->leb_size / c->max_idx_node_sz; -+ return DIV_ROUND_UP(cnt, d); -+} -+ -+/** -+ * layout_in_gaps - in-the-gaps method of committing TNC. -+ * @c: UBIFS file-system description object -+ * @cnt: number of dirty znodes to commit. -+ * -+ * This function lays out new index nodes for dirty znodes using in-the-gaps -+ * method of TNC commit. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int layout_in_gaps(struct ubifs_info *c, int cnt) -+{ -+ int err, leb_needed_cnt, written, *p; -+ -+ dbg_gc("%d znodes to write", cnt); -+ -+ c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS); -+ if (!c->gap_lebs) -+ return -ENOMEM; -+ -+ p = c->gap_lebs; -+ do { -+ ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); -+ written = layout_leb_in_gaps(c, p); -+ if (written < 0) { -+ err = written; -+ if (err != -ENOSPC) { -+ kfree(c->gap_lebs); -+ c->gap_lebs = NULL; -+ return err; -+ } -+ if (!dbg_force_in_the_gaps_enabled) { -+ /* -+ * Do not print scary warnings if the debugging -+ * option which forces in-the-gaps is enabled. -+ */ -+ ubifs_err("out of space"); -+ spin_lock(&c->space_lock); -+ dbg_dump_budg(c); -+ spin_unlock(&c->space_lock); -+ dbg_dump_lprops(c); -+ } -+ /* Try to commit anyway */ -+ err = 0; -+ break; -+ } -+ p++; -+ cnt -= written; -+ leb_needed_cnt = get_leb_cnt(c, cnt); -+ dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt, -+ leb_needed_cnt, c->ileb_cnt); -+ } while (leb_needed_cnt > c->ileb_cnt); -+ -+ *p = -1; -+ return 0; -+} -+ -+/** -+ * layout_in_empty_space - layout index nodes in empty space. -+ * @c: UBIFS file-system description object -+ * -+ * This function lays out new index nodes for dirty znodes using empty LEBs. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int layout_in_empty_space(struct ubifs_info *c) -+{ -+ struct ubifs_znode *znode, *cnext, *zp; -+ int lnum, offs, len, next_len, buf_len, buf_offs, used, avail; -+ int wlen, blen, err; -+ -+ cnext = c->enext; -+ if (!cnext) -+ return 0; -+ -+ lnum = c->ihead_lnum; -+ buf_offs = c->ihead_offs; -+ -+ buf_len = ubifs_idx_node_sz(c, c->fanout); -+ buf_len = ALIGN(buf_len, c->min_io_size); -+ used = 0; -+ avail = buf_len; -+ -+ /* Ensure there is enough room for first write */ -+ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); -+ if (buf_offs + next_len > c->leb_size) -+ lnum = -1; -+ -+ while (1) { -+ znode = cnext; -+ -+ len = ubifs_idx_node_sz(c, znode->child_cnt); -+ -+ /* Determine the index node position */ -+ if (lnum == -1) { -+ if (c->ileb_nxt >= c->ileb_cnt) { -+ ubifs_err("out of space"); -+ return -ENOSPC; -+ } -+ lnum = c->ilebs[c->ileb_nxt++]; -+ buf_offs = 0; -+ used = 0; -+ avail = buf_len; -+ } -+ -+ offs = buf_offs + used; -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ znode->lnum = lnum; -+ znode->offs = offs; -+ znode->len = len; -+#endif -+ -+ /* Update the parent */ -+ zp = znode->parent; -+ if (zp) { -+ struct ubifs_zbranch *zbr; -+ int i; -+ -+ i = znode->iip; -+ zbr = &zp->zbranch[i]; -+ zbr->lnum = lnum; -+ zbr->offs = offs; -+ zbr->len = len; -+ } else { -+ c->zroot.lnum = lnum; -+ c->zroot.offs = offs; -+ c->zroot.len = len; -+ } -+ c->calc_idx_sz += ALIGN(len, 8); -+ -+ /* -+ * Once lprops is updated, we can decrease the dirty znode count -+ * but it is easier to just do it here. -+ */ -+ atomic_long_dec(&c->dirty_zn_cnt); -+ -+ /* -+ * Calculate the next index node length to see if there is -+ * enough room for it -+ */ -+ cnext = znode->cnext; -+ if (cnext == c->cnext) -+ next_len = 0; -+ else -+ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); -+ -+ if (c->min_io_size == 1) { -+ buf_offs += ALIGN(len, 8); -+ if (next_len) { -+ if (buf_offs + next_len <= c->leb_size) -+ continue; -+ err = ubifs_update_one_lp(c, lnum, 0, -+ c->leb_size - buf_offs, 0, 0); -+ if (err) -+ return err; -+ lnum = -1; -+ continue; -+ } -+ err = ubifs_update_one_lp(c, lnum, -+ c->leb_size - buf_offs, 0, 0, 0); -+ if (err) -+ return err; -+ break; -+ } -+ -+ /* Update buffer positions */ -+ wlen = used + len; -+ used += ALIGN(len, 8); -+ avail -= ALIGN(len, 8); -+ -+ if (next_len != 0 && -+ buf_offs + used + next_len <= c->leb_size && -+ avail > 0) -+ continue; -+ -+ if (avail <= 0 && next_len && -+ buf_offs + used + next_len <= c->leb_size) -+ blen = buf_len; -+ else -+ blen = ALIGN(wlen, c->min_io_size); -+ -+ /* The buffer is full or there are no more znodes to do */ -+ buf_offs += blen; -+ if (next_len) { -+ if (buf_offs + next_len > c->leb_size) { -+ err = ubifs_update_one_lp(c, lnum, -+ c->leb_size - buf_offs, blen - used, -+ 0, 0); -+ if (err) -+ return err; -+ lnum = -1; -+ } -+ used -= blen; -+ if (used < 0) -+ used = 0; -+ avail = buf_len - used; -+ continue; -+ } -+ err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs, -+ blen - used, 0, 0); -+ if (err) -+ return err; -+ break; -+ } -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ c->dbg->new_ihead_lnum = lnum; -+ c->dbg->new_ihead_offs = buf_offs; -+#endif -+ -+ return 0; -+} -+ -+/** -+ * layout_commit - determine positions of index nodes to commit. -+ * @c: UBIFS file-system description object -+ * @no_space: indicates that insufficient empty LEBs were allocated -+ * @cnt: number of znodes to commit -+ * -+ * Calculate and update the positions of index nodes to commit. If there were -+ * an insufficient number of empty LEBs allocated, then index nodes are placed -+ * into the gaps created by obsolete index nodes in non-empty index LEBs. For -+ * this purpose, an obsolete index node is one that was not in the index as at -+ * the end of the last commit. To write "in-the-gaps" requires that those index -+ * LEBs are updated atomically in-place. -+ */ -+static int layout_commit(struct ubifs_info *c, int no_space, int cnt) -+{ -+ int err; -+ -+ if (no_space) { -+ err = layout_in_gaps(c, cnt); -+ if (err) -+ return err; -+ } -+ err = layout_in_empty_space(c); -+ return err; -+} -+ -+/** -+ * find_first_dirty - find first dirty znode. -+ * @znode: znode to begin searching from -+ */ -+static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode) -+{ -+ int i, cont; -+ -+ if (!znode) -+ return NULL; -+ -+ while (1) { -+ if (znode->level == 0) { -+ if (ubifs_zn_dirty(znode)) -+ return znode; -+ return NULL; -+ } -+ cont = 0; -+ for (i = 0; i < znode->child_cnt; i++) { -+ struct ubifs_zbranch *zbr = &znode->zbranch[i]; -+ -+ if (zbr->znode && ubifs_zn_dirty(zbr->znode)) { -+ znode = zbr->znode; -+ cont = 1; -+ break; -+ } -+ } -+ if (!cont) { -+ if (ubifs_zn_dirty(znode)) -+ return znode; -+ return NULL; -+ } -+ } -+} -+ -+/** -+ * find_next_dirty - find next dirty znode. -+ * @znode: znode to begin searching from -+ */ -+static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode) -+{ -+ int n = znode->iip + 1; -+ -+ znode = znode->parent; -+ if (!znode) -+ return NULL; -+ for (; n < znode->child_cnt; n++) { -+ struct ubifs_zbranch *zbr = &znode->zbranch[n]; -+ -+ if (zbr->znode && ubifs_zn_dirty(zbr->znode)) -+ return find_first_dirty(zbr->znode); -+ } -+ return znode; -+} -+ -+/** -+ * get_znodes_to_commit - create list of dirty znodes to commit. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns the number of znodes to commit. -+ */ -+static int get_znodes_to_commit(struct ubifs_info *c) -+{ -+ struct ubifs_znode *znode, *cnext; -+ int cnt = 0; -+ -+ c->cnext = find_first_dirty(c->zroot.znode); -+ znode = c->enext = c->cnext; -+ if (!znode) { -+ dbg_cmt("no znodes to commit"); -+ return 0; -+ } -+ cnt += 1; -+ while (1) { -+ ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); -+ __set_bit(COW_ZNODE, &znode->flags); -+ znode->alt = 0; -+ cnext = find_next_dirty(znode); -+ if (!cnext) { -+ znode->cnext = c->cnext; -+ break; -+ } -+ znode->cnext = cnext; -+ znode = cnext; -+ cnt += 1; -+ } -+ dbg_cmt("committing %d znodes", cnt); -+ ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt)); -+ return cnt; -+} -+ -+/** -+ * alloc_idx_lebs - allocate empty LEBs to be used to commit. -+ * @c: UBIFS file-system description object -+ * @cnt: number of znodes to commit -+ * -+ * This function returns %-ENOSPC if it cannot allocate a sufficient number of -+ * empty LEBs. %0 is returned on success, otherwise a negative error code -+ * is returned. -+ */ -+static int alloc_idx_lebs(struct ubifs_info *c, int cnt) -+{ -+ int i, leb_cnt, lnum; -+ -+ c->ileb_cnt = 0; -+ c->ileb_nxt = 0; -+ leb_cnt = get_leb_cnt(c, cnt); -+ dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt); -+ if (!leb_cnt) -+ return 0; -+ c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS); -+ if (!c->ilebs) -+ return -ENOMEM; -+ for (i = 0; i < leb_cnt; i++) { -+ lnum = ubifs_find_free_leb_for_idx(c); -+ if (lnum < 0) -+ return lnum; -+ c->ilebs[c->ileb_cnt++] = lnum; -+ dbg_cmt("LEB %d", lnum); -+ } -+ if (dbg_force_in_the_gaps()) -+ return -ENOSPC; -+ return 0; -+} -+ -+/** -+ * free_unused_idx_lebs - free unused LEBs that were allocated for the commit. -+ * @c: UBIFS file-system description object -+ * -+ * It is possible that we allocate more empty LEBs for the commit than we need. -+ * This functions frees the surplus. -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int free_unused_idx_lebs(struct ubifs_info *c) -+{ -+ int i, err = 0, lnum, er; -+ -+ for (i = c->ileb_nxt; i < c->ileb_cnt; i++) { -+ lnum = c->ilebs[i]; -+ dbg_cmt("LEB %d", lnum); -+ er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, -+ LPROPS_INDEX | LPROPS_TAKEN, 0); -+ if (!err) -+ err = er; -+ } -+ return err; -+} -+ -+/** -+ * free_idx_lebs - free unused LEBs after commit end. -+ * @c: UBIFS file-system description object -+ * -+ * This function returns %0 on success and a negative error code on failure. -+ */ -+static int free_idx_lebs(struct ubifs_info *c) -+{ -+ int err; -+ -+ err = free_unused_idx_lebs(c); -+ kfree(c->ilebs); -+ c->ilebs = NULL; -+ return err; -+} -+ -+/** -+ * ubifs_tnc_start_commit - start TNC commit. -+ * @c: UBIFS file-system description object -+ * @zroot: new index root position is returned here -+ * -+ * This function prepares the list of indexing nodes to commit and lays out -+ * their positions on flash. If there is not enough free space it uses the -+ * in-gap commit method. Returns zero in case of success and a negative error -+ * code in case of failure. -+ */ -+int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) -+{ -+ int err = 0, cnt; -+ -+ mutex_lock(&c->tnc_mutex); -+ err = dbg_check_tnc(c, 1); -+ if (err) -+ goto out; -+ cnt = get_znodes_to_commit(c); -+ if (cnt != 0) { -+ int no_space = 0; -+ -+ err = alloc_idx_lebs(c, cnt); -+ if (err == -ENOSPC) -+ no_space = 1; -+ else if (err) -+ goto out_free; -+ err = layout_commit(c, no_space, cnt); -+ if (err) -+ goto out_free; -+ ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); -+ err = free_unused_idx_lebs(c); -+ if (err) -+ goto out; -+ } -+ destroy_old_idx(c); -+ memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch)); -+ -+ err = ubifs_save_dirty_idx_lnums(c); -+ if (err) -+ goto out; -+ -+ spin_lock(&c->space_lock); -+ /* -+ * Although we have not finished committing yet, update size of the -+ * committed index ('c->old_idx_sz') and zero out the index growth -+ * budget. It is OK to do this now, because we've reserved all the -+ * space which is needed to commit the index, and it is save for the -+ * budgeting subsystem to assume the index is already committed, -+ * even though it is not. -+ */ -+ ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); -+ c->old_idx_sz = c->calc_idx_sz; -+ c->budg_uncommitted_idx = 0; -+ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); -+ spin_unlock(&c->space_lock); -+ mutex_unlock(&c->tnc_mutex); -+ -+ dbg_cmt("number of index LEBs %d", c->lst.idx_lebs); -+ dbg_cmt("size of index %llu", c->calc_idx_sz); -+ return err; -+ -+out_free: -+ free_idx_lebs(c); -+out: -+ mutex_unlock(&c->tnc_mutex); -+ return err; -+} -+ -+/** -+ * write_index - write index nodes. -+ * @c: UBIFS file-system description object -+ * -+ * This function writes the index nodes whose positions were laid out in the -+ * layout_in_empty_space function. -+ */ -+static int write_index(struct ubifs_info *c) -+{ -+ struct ubifs_idx_node *idx; -+ struct ubifs_znode *znode, *cnext; -+ int i, lnum, offs, len, next_len, buf_len, buf_offs, used; -+ int avail, wlen, err, lnum_pos = 0; -+ -+ cnext = c->enext; -+ if (!cnext) -+ return 0; -+ -+ /* -+ * Always write index nodes to the index head so that index nodes and -+ * other types of nodes are never mixed in the same erase block. -+ */ -+ lnum = c->ihead_lnum; -+ buf_offs = c->ihead_offs; -+ -+ /* Allocate commit buffer */ -+ buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size); -+ used = 0; -+ avail = buf_len; -+ -+ /* Ensure there is enough room for first write */ -+ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); -+ if (buf_offs + next_len > c->leb_size) { -+ err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0, -+ LPROPS_TAKEN); -+ if (err) -+ return err; -+ lnum = -1; -+ } -+ -+ while (1) { -+ cond_resched(); -+ -+ znode = cnext; -+ idx = c->cbuf + used; -+ -+ /* Make index node */ -+ idx->ch.node_type = UBIFS_IDX_NODE; -+ idx->child_cnt = cpu_to_le16(znode->child_cnt); -+ idx->level = cpu_to_le16(znode->level); -+ for (i = 0; i < znode->child_cnt; i++) { -+ struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); -+ struct ubifs_zbranch *zbr = &znode->zbranch[i]; -+ -+ key_write_idx(c, &zbr->key, &br->key); -+ br->lnum = cpu_to_le32(zbr->lnum); -+ br->offs = cpu_to_le32(zbr->offs); -+ br->len = cpu_to_le32(zbr->len); -+ if (!zbr->lnum || !zbr->len) { -+ ubifs_err("bad ref in znode"); -+ dbg_dump_znode(c, znode); -+ if (zbr->znode) -+ dbg_dump_znode(c, zbr->znode); -+ } -+ } -+ len = ubifs_idx_node_sz(c, znode->child_cnt); -+ ubifs_prepare_node(c, idx, len, 0); -+ -+ /* Determine the index node position */ -+ if (lnum == -1) { -+ lnum = c->ilebs[lnum_pos++]; -+ buf_offs = 0; -+ used = 0; -+ avail = buf_len; -+ } -+ offs = buf_offs + used; -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ if (lnum != znode->lnum || offs != znode->offs || -+ len != znode->len) { -+ ubifs_err("inconsistent znode posn"); -+ return -EINVAL; -+ } -+#endif -+ -+ /* Grab some stuff from znode while we still can */ -+ cnext = znode->cnext; -+ -+ ubifs_assert(ubifs_zn_dirty(znode)); -+ ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); -+ -+ /* -+ * It is important that other threads should see %DIRTY_ZNODE -+ * flag cleared before %COW_ZNODE. Specifically, it matters in -+ * the 'dirty_cow_znode()' function. This is the reason for the -+ * first barrier. Also, we want the bit changes to be seen to -+ * other threads ASAP, to avoid unnecesarry copying, which is -+ * the reason for the second barrier. -+ */ -+ clear_bit(DIRTY_ZNODE, &znode->flags); -+ smp_mb__before_clear_bit(); -+ clear_bit(COW_ZNODE, &znode->flags); -+ smp_mb__after_clear_bit(); -+ -+ /* Do not access znode from this point on */ -+ -+ /* Update buffer positions */ -+ wlen = used + len; -+ used += ALIGN(len, 8); -+ avail -= ALIGN(len, 8); -+ -+ /* -+ * Calculate the next index node length to see if there is -+ * enough room for it -+ */ -+ if (cnext == c->cnext) -+ next_len = 0; -+ else -+ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); -+ -+ if (c->min_io_size == 1) { -+ /* -+ * Write the prepared index node immediately if there is -+ * no minimum IO size -+ */ -+ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, -+ wlen, UBI_SHORTTERM); -+ if (err) -+ return err; -+ buf_offs += ALIGN(wlen, 8); -+ if (next_len) { -+ used = 0; -+ avail = buf_len; -+ if (buf_offs + next_len > c->leb_size) { -+ err = ubifs_update_one_lp(c, lnum, -+ LPROPS_NC, 0, 0, LPROPS_TAKEN); -+ if (err) -+ return err; -+ lnum = -1; -+ } -+ continue; -+ } -+ } else { -+ int blen, nxt_offs = buf_offs + used + next_len; -+ -+ if (next_len && nxt_offs <= c->leb_size) { -+ if (avail > 0) -+ continue; -+ else -+ blen = buf_len; -+ } else { -+ wlen = ALIGN(wlen, 8); -+ blen = ALIGN(wlen, c->min_io_size); -+ ubifs_pad(c, c->cbuf + wlen, blen - wlen); -+ } -+ /* -+ * The buffer is full or there are no more znodes -+ * to do -+ */ -+ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, -+ blen, UBI_SHORTTERM); -+ if (err) -+ return err; -+ buf_offs += blen; -+ if (next_len) { -+ if (nxt_offs > c->leb_size) { -+ err = ubifs_update_one_lp(c, lnum, -+ LPROPS_NC, 0, 0, LPROPS_TAKEN); -+ if (err) -+ return err; -+ lnum = -1; -+ } -+ used -= blen; -+ if (used < 0) -+ used = 0; -+ avail = buf_len - used; -+ memmove(c->cbuf, c->cbuf + blen, used); -+ continue; -+ } -+ } -+ break; -+ } -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ if (lnum != c->dbg->new_ihead_lnum || -+ buf_offs != c->dbg->new_ihead_offs) { -+ ubifs_err("inconsistent ihead"); -+ return -EINVAL; -+ } -+#endif -+ -+ c->ihead_lnum = lnum; -+ c->ihead_offs = buf_offs; -+ -+ return 0; -+} -+ -+/** -+ * free_obsolete_znodes - free obsolete znodes. -+ * @c: UBIFS file-system description object -+ * -+ * At the end of commit end, obsolete znodes are freed. -+ */ -+static void free_obsolete_znodes(struct ubifs_info *c) -+{ -+ struct ubifs_znode *znode, *cnext; -+ -+ cnext = c->cnext; -+ do { -+ znode = cnext; -+ cnext = znode->cnext; -+ if (test_bit(OBSOLETE_ZNODE, &znode->flags)) -+ kfree(znode); -+ else { -+ znode->cnext = NULL; -+ atomic_long_inc(&c->clean_zn_cnt); -+ atomic_long_inc(&ubifs_clean_zn_cnt); -+ } -+ } while (cnext != c->cnext); -+} -+ -+/** -+ * return_gap_lebs - return LEBs used by the in-gap commit method. -+ * @c: UBIFS file-system description object -+ * -+ * This function clears the "taken" flag for the LEBs which were used by the -+ * "commit in-the-gaps" method. -+ */ -+static int return_gap_lebs(struct ubifs_info *c) -+{ -+ int *p, err; -+ -+ if (!c->gap_lebs) -+ return 0; -+ -+ dbg_cmt(""); -+ for (p = c->gap_lebs; *p != -1; p++) { -+ err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0, -+ LPROPS_TAKEN, 0); -+ if (err) -+ return err; -+ } -+ -+ kfree(c->gap_lebs); -+ c->gap_lebs = NULL; -+ return 0; -+} -+ -+/** -+ * ubifs_tnc_end_commit - update the TNC for commit end. -+ * @c: UBIFS file-system description object -+ * -+ * Write the dirty znodes. -+ */ -+int ubifs_tnc_end_commit(struct ubifs_info *c) -+{ -+ int err; -+ -+ if (!c->cnext) -+ return 0; -+ -+ err = return_gap_lebs(c); -+ if (err) -+ return err; -+ -+ err = write_index(c); -+ if (err) -+ return err; -+ -+ mutex_lock(&c->tnc_mutex); -+ -+ dbg_cmt("TNC height is %d", c->zroot.znode->level + 1); -+ -+ free_obsolete_znodes(c); -+ -+ c->cnext = NULL; -+ kfree(c->ilebs); -+ c->ilebs = NULL; -+ -+ mutex_unlock(&c->tnc_mutex); -+ -+ return 0; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/tnc_misc.c linux-2.6.24/fs/ubifs/tnc_misc.c ---- linux-2.6.24.orig/fs/ubifs/tnc_misc.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/tnc_misc.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,494 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Adrian Hunter -+ * Artem Bityutskiy (Битюцкий Артём) -+ */ -+ -+/* -+ * This file contains miscelanious TNC-related functions shared betweend -+ * different files. This file does not form any logically separate TNC -+ * sub-system. The file was created because there is a lot of TNC code and -+ * putting it all in one file would make that file too big and unreadable. -+ */ -+ -+#include "ubifs.h" -+ -+/** -+ * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal. -+ * @zr: root of the subtree to traverse -+ * @znode: previous znode -+ * -+ * This function implements levelorder TNC traversal. The LNC is ignored. -+ * Returns the next element or %NULL if @znode is already the last one. -+ */ -+struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, -+ struct ubifs_znode *znode) -+{ -+ int level, iip, level_search = 0; -+ struct ubifs_znode *zn; -+ -+ ubifs_assert(zr); -+ -+ if (unlikely(!znode)) -+ return zr; -+ -+ if (unlikely(znode == zr)) { -+ if (znode->level == 0) -+ return NULL; -+ return ubifs_tnc_find_child(zr, 0); -+ } -+ -+ level = znode->level; -+ -+ iip = znode->iip; -+ while (1) { -+ ubifs_assert(znode->level <= zr->level); -+ -+ /* -+ * First walk up until there is a znode with next branch to -+ * look at. -+ */ -+ while (znode->parent != zr && iip >= znode->parent->child_cnt) { -+ znode = znode->parent; -+ iip = znode->iip; -+ } -+ -+ if (unlikely(znode->parent == zr && -+ iip >= znode->parent->child_cnt)) { -+ /* This level is done, switch to the lower one */ -+ level -= 1; -+ if (level_search || level < 0) -+ /* -+ * We were already looking for znode at lower -+ * level ('level_search'). As we are here -+ * again, it just does not exist. Or all levels -+ * were finished ('level < 0'). -+ */ -+ return NULL; -+ -+ level_search = 1; -+ iip = -1; -+ znode = ubifs_tnc_find_child(zr, 0); -+ ubifs_assert(znode); -+ } -+ -+ /* Switch to the next index */ -+ zn = ubifs_tnc_find_child(znode->parent, iip + 1); -+ if (!zn) { -+ /* No more children to look at, we have walk up */ -+ iip = znode->parent->child_cnt; -+ continue; -+ } -+ -+ /* Walk back down to the level we came from ('level') */ -+ while (zn->level != level) { -+ znode = zn; -+ zn = ubifs_tnc_find_child(zn, 0); -+ if (!zn) { -+ /* -+ * This path is not too deep so it does not -+ * reach 'level'. Try next path. -+ */ -+ iip = znode->iip; -+ break; -+ } -+ } -+ -+ if (zn) { -+ ubifs_assert(zn->level >= 0); -+ return zn; -+ } -+ } -+} -+ -+/** -+ * ubifs_search_zbranch - search znode branch. -+ * @c: UBIFS file-system description object -+ * @znode: znode to search in -+ * @key: key to search for -+ * @n: znode branch slot number is returned here -+ * -+ * This is a helper function which search branch with key @key in @znode using -+ * binary search. The result of the search may be: -+ * o exact match, then %1 is returned, and the slot number of the branch is -+ * stored in @n; -+ * o no exact match, then %0 is returned and the slot number of the left -+ * closest branch is returned in @n; the slot if all keys in this znode are -+ * greater than @key, then %-1 is returned in @n. -+ */ -+int ubifs_search_zbranch(const struct ubifs_info *c, -+ const struct ubifs_znode *znode, -+ const union ubifs_key *key, int *n) -+{ -+ int beg = 0, end = znode->child_cnt, uninitialized_var(mid); -+ int uninitialized_var(cmp); -+ const struct ubifs_zbranch *zbr = &znode->zbranch[0]; -+ -+ ubifs_assert(end > beg); -+ -+ while (end > beg) { -+ mid = (beg + end) >> 1; -+ cmp = keys_cmp(c, key, &zbr[mid].key); -+ if (cmp > 0) -+ beg = mid + 1; -+ else if (cmp < 0) -+ end = mid; -+ else { -+ *n = mid; -+ return 1; -+ } -+ } -+ -+ *n = end - 1; -+ -+ /* The insert point is after *n */ -+ ubifs_assert(*n >= -1 && *n < znode->child_cnt); -+ if (*n == -1) -+ ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0); -+ else -+ ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0); -+ if (*n + 1 < znode->child_cnt) -+ ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0); -+ -+ return 0; -+} -+ -+/** -+ * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal. -+ * @znode: znode to start at (root of the sub-tree to traverse) -+ * -+ * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is -+ * ignored. -+ */ -+struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode) -+{ -+ if (unlikely(!znode)) -+ return NULL; -+ -+ while (znode->level > 0) { -+ struct ubifs_znode *child; -+ -+ child = ubifs_tnc_find_child(znode, 0); -+ if (!child) -+ return znode; -+ znode = child; -+ } -+ -+ return znode; -+} -+ -+/** -+ * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal. -+ * @znode: previous znode -+ * -+ * This function implements postorder TNC traversal. The LNC is ignored. -+ * Returns the next element or %NULL if @znode is already the last one. -+ */ -+struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) -+{ -+ struct ubifs_znode *zn; -+ -+ ubifs_assert(znode); -+ if (unlikely(!znode->parent)) -+ return NULL; -+ -+ /* Switch to the next index in the parent */ -+ zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1); -+ if (!zn) -+ /* This is in fact the last child, return parent */ -+ return znode->parent; -+ -+ /* Go to the first znode in this new subtree */ -+ return ubifs_tnc_postorder_first(zn); -+} -+ -+/** -+ * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree. -+ * @znode: znode defining subtree to destroy -+ * -+ * This function destroys subtree of the TNC tree. Returns number of clean -+ * znodes in the subtree. -+ */ -+long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode) -+{ -+ struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode); -+ long clean_freed = 0; -+ int n; -+ -+ ubifs_assert(zn); -+ while (1) { -+ for (n = 0; n < zn->child_cnt; n++) { -+ if (!zn->zbranch[n].znode) -+ continue; -+ -+ if (zn->level > 0 && -+ !ubifs_zn_dirty(zn->zbranch[n].znode)) -+ clean_freed += 1; -+ -+ cond_resched(); -+ kfree(zn->zbranch[n].znode); -+ } -+ -+ if (zn == znode) { -+ if (!ubifs_zn_dirty(zn)) -+ clean_freed += 1; -+ kfree(zn); -+ return clean_freed; -+ } -+ -+ zn = ubifs_tnc_postorder_next(zn); -+ } -+} -+ -+/** -+ * read_znode - read an indexing node from flash and fill znode. -+ * @c: UBIFS file-system description object -+ * @lnum: LEB of the indexing node to read -+ * @offs: node offset -+ * @len: node length -+ * @znode: znode to read to -+ * -+ * This function reads an indexing node from the flash media and fills znode -+ * with the read data. Returns zero in case of success and a negative error -+ * code in case of failure. The read indexing node is validated and if anything -+ * is wrong with it, this function prints complaint messages and returns -+ * %-EINVAL. -+ */ -+static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, -+ struct ubifs_znode *znode) -+{ -+ int i, err, type, cmp; -+ struct ubifs_idx_node *idx; -+ -+ idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); -+ if (!idx) -+ return -ENOMEM; -+ -+ err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); -+ if (err < 0) { -+ kfree(idx); -+ return err; -+ } -+ -+ znode->child_cnt = le16_to_cpu(idx->child_cnt); -+ znode->level = le16_to_cpu(idx->level); -+ -+ dbg_tnc("LEB %d:%d, level %d, %d branch", -+ lnum, offs, znode->level, znode->child_cnt); -+ -+ if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { -+ dbg_err("current fanout %d, branch count %d", -+ c->fanout, znode->child_cnt); -+ dbg_err("max levels %d, znode level %d", -+ UBIFS_MAX_LEVELS, znode->level); -+ err = 1; -+ goto out_dump; -+ } -+ -+ for (i = 0; i < znode->child_cnt; i++) { -+ const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); -+ struct ubifs_zbranch *zbr = &znode->zbranch[i]; -+ -+ key_read(c, &br->key, &zbr->key); -+ zbr->lnum = le32_to_cpu(br->lnum); -+ zbr->offs = le32_to_cpu(br->offs); -+ zbr->len = le32_to_cpu(br->len); -+ zbr->znode = NULL; -+ -+ /* Validate branch */ -+ -+ if (zbr->lnum < c->main_first || -+ zbr->lnum >= c->leb_cnt || zbr->offs < 0 || -+ zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { -+ dbg_err("bad branch %d", i); -+ err = 2; -+ goto out_dump; -+ } -+ -+ switch (key_type(c, &zbr->key)) { -+ case UBIFS_INO_KEY: -+ case UBIFS_DATA_KEY: -+ case UBIFS_DENT_KEY: -+ case UBIFS_XENT_KEY: -+ break; -+ default: -+ dbg_msg("bad key type at slot %d: %s", i, -+ DBGKEY(&zbr->key)); -+ err = 3; -+ goto out_dump; -+ } -+ -+ if (znode->level) -+ continue; -+ -+ type = key_type(c, &zbr->key); -+ if (c->ranges[type].max_len == 0) { -+ if (zbr->len != c->ranges[type].len) { -+ dbg_err("bad target node (type %d) length (%d)", -+ type, zbr->len); -+ dbg_err("have to be %d", c->ranges[type].len); -+ err = 4; -+ goto out_dump; -+ } -+ } else if (zbr->len < c->ranges[type].min_len || -+ zbr->len > c->ranges[type].max_len) { -+ dbg_err("bad target node (type %d) length (%d)", -+ type, zbr->len); -+ dbg_err("have to be in range of %d-%d", -+ c->ranges[type].min_len, -+ c->ranges[type].max_len); -+ err = 5; -+ goto out_dump; -+ } -+ } -+ -+ /* -+ * Ensure that the next key is greater or equivalent to the -+ * previous one. -+ */ -+ for (i = 0; i < znode->child_cnt - 1; i++) { -+ const union ubifs_key *key1, *key2; -+ -+ key1 = &znode->zbranch[i].key; -+ key2 = &znode->zbranch[i + 1].key; -+ -+ cmp = keys_cmp(c, key1, key2); -+ if (cmp > 0) { -+ dbg_err("bad key order (keys %d and %d)", i, i + 1); -+ err = 6; -+ goto out_dump; -+ } else if (cmp == 0 && !is_hash_key(c, key1)) { -+ /* These can only be keys with colliding hash */ -+ dbg_err("keys %d and %d are not hashed but equivalent", -+ i, i + 1); -+ err = 7; -+ goto out_dump; -+ } -+ } -+ -+ kfree(idx); -+ return 0; -+ -+out_dump: -+ ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); -+ dbg_dump_node(c, idx); -+ kfree(idx); -+ return -EINVAL; -+} -+ -+/** -+ * ubifs_load_znode - load znode to TNC cache. -+ * @c: UBIFS file-system description object -+ * @zbr: znode branch -+ * @parent: znode's parent -+ * @iip: index in parent -+ * -+ * This function loads znode pointed to by @zbr into the TNC cache and -+ * returns pointer to it in case of success and a negative error code in case -+ * of failure. -+ */ -+struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, -+ struct ubifs_zbranch *zbr, -+ struct ubifs_znode *parent, int iip) -+{ -+ int err; -+ struct ubifs_znode *znode; -+ -+ ubifs_assert(!zbr->znode); -+ /* -+ * A slab cache is not presently used for znodes because the znode size -+ * depends on the fanout which is stored in the superblock. -+ */ -+ znode = kzalloc(c->max_znode_sz, GFP_NOFS); -+ if (!znode) -+ return ERR_PTR(-ENOMEM); -+ -+ err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode); -+ if (err) -+ goto out; -+ -+ atomic_long_inc(&c->clean_zn_cnt); -+ -+ /* -+ * Increment the global clean znode counter as well. It is OK that -+ * global and per-FS clean znode counters may be inconsistent for some -+ * short time (because we might be preempted at this point), the global -+ * one is only used in shrinker. -+ */ -+ atomic_long_inc(&ubifs_clean_zn_cnt); -+ -+ zbr->znode = znode; -+ znode->parent = parent; -+ znode->time = get_seconds(); -+ znode->iip = iip; -+ -+ return znode; -+ -+out: -+ kfree(znode); -+ return ERR_PTR(err); -+} -+ -+/** -+ * ubifs_tnc_read_node - read a leaf node from the flash media. -+ * @c: UBIFS file-system description object -+ * @zbr: key and position of the node -+ * @node: node is returned here -+ * -+ * This function reads a node defined by @zbr from the flash media. Returns -+ * zero in case of success or a negative negative error code in case of -+ * failure. -+ */ -+int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ void *node) -+{ -+ union ubifs_key key1, *key = &zbr->key; -+ int err, type = key_type(c, key); -+ struct ubifs_wbuf *wbuf; -+ -+ /* -+ * 'zbr' has to point to on-flash node. The node may sit in a bud and -+ * may even be in a write buffer, so we have to take care about this. -+ */ -+ wbuf = ubifs_get_wbuf(c, zbr->lnum); -+ if (wbuf) -+ err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len, -+ zbr->lnum, zbr->offs); -+ else -+ err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, -+ zbr->offs); -+ -+ if (err) { -+ dbg_tnc("key %s", DBGKEY(key)); -+ return err; -+ } -+ -+ /* Make sure the key of the read node is correct */ -+ key_read(c, node + UBIFS_KEY_OFFSET, &key1); -+ if (!keys_eq(c, key, &key1)) { -+ ubifs_err("bad key in node at LEB %d:%d", -+ zbr->lnum, zbr->offs); -+ dbg_tnc("looked for key %s found node's key %s", -+ DBGKEY(key), DBGKEY1(&key1)); -+ dbg_dump_node(c, node); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -diff -Nurd linux-2.6.24.orig/fs/ubifs/ubifs-media.h linux-2.6.24/fs/ubifs/ubifs-media.h ---- linux-2.6.24.orig/fs/ubifs/ubifs-media.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/ubifs-media.h 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,751 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file describes UBIFS on-flash format and contains definitions of all the -+ * relevant data structures and constants. -+ * -+ * All UBIFS on-flash objects are stored in the form of nodes. All nodes start -+ * with the UBIFS node magic number and have the same common header. Nodes -+ * always sit at 8-byte aligned positions on the media and node header sizes are -+ * also 8-byte aligned (except for the indexing node and the padding node). -+ */ -+ -+#ifndef __UBIFS_MEDIA_H__ -+#define __UBIFS_MEDIA_H__ -+ -+/* UBIFS node magic number (must not have the padding byte first or last) */ -+#define UBIFS_NODE_MAGIC 0x06101831 -+ -+/* UBIFS on-flash format version */ -+#define UBIFS_FORMAT_VERSION 4 -+ -+/* Minimum logical eraseblock size in bytes */ -+#define UBIFS_MIN_LEB_SZ (15*1024) -+ -+/* Initial CRC32 value used when calculating CRC checksums */ -+#define UBIFS_CRC32_INIT 0xFFFFFFFFU -+ -+/* -+ * UBIFS does not try to compress data if its length is less than the below -+ * constant. -+ */ -+#define UBIFS_MIN_COMPR_LEN 128 -+ -+/* -+ * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes -+ * shorter than uncompressed data length, UBIFS preferes to leave this data -+ * node uncompress, because it'll be read faster. -+ */ -+#define UBIFS_MIN_COMPRESS_DIFF 64 -+ -+/* Root inode number */ -+#define UBIFS_ROOT_INO 1 -+ -+/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */ -+#define UBIFS_FIRST_INO 64 -+ -+/* -+ * Maximum file name and extended attribute length (must be a multiple of 8, -+ * minus 1). -+ */ -+#define UBIFS_MAX_NLEN 255 -+ -+/* Maximum number of data journal heads */ -+#define UBIFS_MAX_JHEADS 1 -+ -+/* -+ * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system, -+ * which means that it does not treat the underlying media as consisting of -+ * blocks like in case of hard drives. Do not be confused. UBIFS block is just -+ * the maximum amount of data which one data node can have or which can be -+ * attached to an inode node. -+ */ -+#define UBIFS_BLOCK_SIZE 4096 -+#define UBIFS_BLOCK_SHIFT 12 -+ -+/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ -+#define UBIFS_PADDING_BYTE 0xCE -+ -+/* Maximum possible key length */ -+#define UBIFS_MAX_KEY_LEN 16 -+ -+/* Key length ("simple" format) */ -+#define UBIFS_SK_LEN 8 -+ -+/* Minimum index tree fanout */ -+#define UBIFS_MIN_FANOUT 3 -+ -+/* Maximum number of levels in UBIFS indexing B-tree */ -+#define UBIFS_MAX_LEVELS 512 -+ -+/* Maximum amount of data attached to an inode in bytes */ -+#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE -+ -+/* LEB Properties Tree fanout (must be power of 2) and fanout shift */ -+#define UBIFS_LPT_FANOUT 4 -+#define UBIFS_LPT_FANOUT_SHIFT 2 -+ -+/* LEB Properties Tree bit field sizes */ -+#define UBIFS_LPT_CRC_BITS 16 -+#define UBIFS_LPT_CRC_BYTES 2 -+#define UBIFS_LPT_TYPE_BITS 4 -+ -+/* The key is always at the same position in all keyed nodes */ -+#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) -+ -+/* -+ * LEB Properties Tree node types. -+ * -+ * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties) -+ * UBIFS_LPT_NNODE: LPT internal node -+ * UBIFS_LPT_LTAB: LPT's own lprops table -+ * UBIFS_LPT_LSAVE: LPT's save table (big model only) -+ * UBIFS_LPT_NODE_CNT: count of LPT node types -+ * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type -+ */ -+enum { -+ UBIFS_LPT_PNODE, -+ UBIFS_LPT_NNODE, -+ UBIFS_LPT_LTAB, -+ UBIFS_LPT_LSAVE, -+ UBIFS_LPT_NODE_CNT, -+ UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1, -+}; -+ -+/* -+ * UBIFS inode types. -+ * -+ * UBIFS_ITYPE_REG: regular file -+ * UBIFS_ITYPE_DIR: directory -+ * UBIFS_ITYPE_LNK: soft link -+ * UBIFS_ITYPE_BLK: block device node -+ * UBIFS_ITYPE_CHR: character device node -+ * UBIFS_ITYPE_FIFO: fifo -+ * UBIFS_ITYPE_SOCK: socket -+ * UBIFS_ITYPES_CNT: count of supported file types -+ */ -+enum { -+ UBIFS_ITYPE_REG, -+ UBIFS_ITYPE_DIR, -+ UBIFS_ITYPE_LNK, -+ UBIFS_ITYPE_BLK, -+ UBIFS_ITYPE_CHR, -+ UBIFS_ITYPE_FIFO, -+ UBIFS_ITYPE_SOCK, -+ UBIFS_ITYPES_CNT, -+}; -+ -+/* -+ * Supported key hash functions. -+ * -+ * UBIFS_KEY_HASH_R5: R5 hash -+ * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name -+ */ -+enum { -+ UBIFS_KEY_HASH_R5, -+ UBIFS_KEY_HASH_TEST, -+}; -+ -+/* -+ * Supported key formats. -+ * -+ * UBIFS_SIMPLE_KEY_FMT: simple key format -+ */ -+enum { -+ UBIFS_SIMPLE_KEY_FMT, -+}; -+ -+/* -+ * The simple key format uses 29 bits for storing UBIFS block number and hash -+ * value. -+ */ -+#define UBIFS_S_KEY_BLOCK_BITS 29 -+#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF -+#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS -+#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK -+ -+/* -+ * Key types. -+ * -+ * UBIFS_INO_KEY: inode node key -+ * UBIFS_DATA_KEY: data node key -+ * UBIFS_DENT_KEY: directory entry node key -+ * UBIFS_XENT_KEY: extended attribute entry key -+ * UBIFS_KEY_TYPES_CNT: number of supported key types -+ */ -+enum { -+ UBIFS_INO_KEY, -+ UBIFS_DATA_KEY, -+ UBIFS_DENT_KEY, -+ UBIFS_XENT_KEY, -+ UBIFS_KEY_TYPES_CNT, -+}; -+ -+/* Count of LEBs reserved for the superblock area */ -+#define UBIFS_SB_LEBS 1 -+/* Count of LEBs reserved for the master area */ -+#define UBIFS_MST_LEBS 2 -+ -+/* First LEB of the superblock area */ -+#define UBIFS_SB_LNUM 0 -+/* First LEB of the master area */ -+#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS) -+/* First LEB of the log area */ -+#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS) -+ -+/* -+ * The below constants define the absolute minimum values for various UBIFS -+ * media areas. Many of them actually depend of flash geometry and the FS -+ * configuration (number of journal heads, orphan LEBs, etc). This means that -+ * the smallest volume size which can be used for UBIFS cannot be pre-defined -+ * by these constants. The file-system that meets the below limitation will not -+ * necessarily mount. UBIFS does run-time calculations and validates the FS -+ * size. -+ */ -+ -+/* Minimum number of logical eraseblocks in the log */ -+#define UBIFS_MIN_LOG_LEBS 2 -+/* Minimum number of bud logical eraseblocks (one for each head) */ -+#define UBIFS_MIN_BUD_LEBS 3 -+/* Minimum number of journal logical eraseblocks */ -+#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS) -+/* Minimum number of LPT area logical eraseblocks */ -+#define UBIFS_MIN_LPT_LEBS 2 -+/* Minimum number of orphan area logical eraseblocks */ -+#define UBIFS_MIN_ORPH_LEBS 1 -+/* -+ * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1 -+ * for GC, 1 for deletions, and at least 1 for committed data). -+ */ -+#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6) -+ -+/* Minimum number of logical eraseblocks */ -+#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ -+ UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \ -+ UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS) -+ -+/* Node sizes (N.B. these are guaranteed to be multiples of 8) */ -+#define UBIFS_CH_SZ sizeof(struct ubifs_ch) -+#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node) -+#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node) -+#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node) -+#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node) -+#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node) -+#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node) -+#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node) -+#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node) -+#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node) -+#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node) -+#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) -+/* Extended attribute entry nodes are identical to directory entry nodes */ -+#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ -+/* Only this does not have to be multiple of 8 bytes */ -+#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch) -+ -+/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */ -+#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) -+#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA) -+#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1) -+#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ -+ -+/* The largest UBIFS node */ -+#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ -+ -+/* -+ * On-flash inode flags. -+ * -+ * UBIFS_COMPR_FL: use compression for this inode -+ * UBIFS_SYNC_FL: I/O on this inode has to be synchronous -+ * UBIFS_IMMUTABLE_FL: inode is immutable -+ * UBIFS_APPEND_FL: writes to the inode may only append data -+ * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous -+ * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value -+ * -+ * Note, these are on-flash flags which correspond to ioctl flags -+ * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not -+ * have to be the same. -+ */ -+enum { -+ UBIFS_COMPR_FL = 0x01, -+ UBIFS_SYNC_FL = 0x02, -+ UBIFS_IMMUTABLE_FL = 0x04, -+ UBIFS_APPEND_FL = 0x08, -+ UBIFS_DIRSYNC_FL = 0x10, -+ UBIFS_XATTR_FL = 0x20, -+}; -+ -+/* Inode flag bits used by UBIFS */ -+#define UBIFS_FL_MASK 0x0000001F -+ -+/* -+ * UBIFS compression types. -+ * -+ * UBIFS_COMPR_NONE: no compression -+ * UBIFS_COMPR_LZO: LZO compression -+ * UBIFS_COMPR_ZLIB: ZLIB compression -+ * UBIFS_COMPR_TYPES_CNT: count of supported compression types -+ */ -+enum { -+ UBIFS_COMPR_NONE, -+ UBIFS_COMPR_LZO, -+ UBIFS_COMPR_ZLIB, -+ UBIFS_COMPR_TYPES_CNT, -+}; -+ -+/* -+ * UBIFS node types. -+ * -+ * UBIFS_INO_NODE: inode node -+ * UBIFS_DATA_NODE: data node -+ * UBIFS_DENT_NODE: directory entry node -+ * UBIFS_XENT_NODE: extended attribute node -+ * UBIFS_TRUN_NODE: truncation node -+ * UBIFS_PAD_NODE: padding node -+ * UBIFS_SB_NODE: superblock node -+ * UBIFS_MST_NODE: master node -+ * UBIFS_REF_NODE: LEB reference node -+ * UBIFS_IDX_NODE: index node -+ * UBIFS_CS_NODE: commit start node -+ * UBIFS_ORPH_NODE: orphan node -+ * UBIFS_NODE_TYPES_CNT: count of supported node types -+ * -+ * Note, we index arrays by these numbers, so keep them low and contiguous. -+ * Node type constants for inodes, direntries and so on have to be the same as -+ * corresponding key type constants. -+ */ -+enum { -+ UBIFS_INO_NODE, -+ UBIFS_DATA_NODE, -+ UBIFS_DENT_NODE, -+ UBIFS_XENT_NODE, -+ UBIFS_TRUN_NODE, -+ UBIFS_PAD_NODE, -+ UBIFS_SB_NODE, -+ UBIFS_MST_NODE, -+ UBIFS_REF_NODE, -+ UBIFS_IDX_NODE, -+ UBIFS_CS_NODE, -+ UBIFS_ORPH_NODE, -+ UBIFS_NODE_TYPES_CNT, -+}; -+ -+/* -+ * Master node flags. -+ * -+ * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty -+ * UBIFS_MST_NO_ORPHS: no orphan inodes present -+ * UBIFS_MST_RCVRY: written by recovery -+ */ -+enum { -+ UBIFS_MST_DIRTY = 1, -+ UBIFS_MST_NO_ORPHS = 2, -+ UBIFS_MST_RCVRY = 4, -+}; -+ -+/* -+ * Node group type (used by recovery to recover whole group or none). -+ * -+ * UBIFS_NO_NODE_GROUP: this node is not part of a group -+ * UBIFS_IN_NODE_GROUP: this node is a part of a group -+ * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group -+ */ -+enum { -+ UBIFS_NO_NODE_GROUP = 0, -+ UBIFS_IN_NODE_GROUP, -+ UBIFS_LAST_OF_NODE_GROUP, -+}; -+ -+/* -+ * Superblock flags. -+ * -+ * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set -+ */ -+enum { -+ UBIFS_FLG_BIGLPT = 0x02, -+}; -+ -+/** -+ * struct ubifs_ch - common header node. -+ * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC) -+ * @crc: CRC-32 checksum of the node header -+ * @sqnum: sequence number -+ * @len: full node length -+ * @node_type: node type -+ * @group_type: node group type -+ * @padding: reserved for future, zeroes -+ * -+ * Every UBIFS node starts with this common part. If the node has a key, the -+ * key always goes next. -+ */ -+struct ubifs_ch { -+ __le32 magic; -+ __le32 crc; -+ __le64 sqnum; -+ __le32 len; -+ __u8 node_type; -+ __u8 group_type; -+ __u8 padding[2]; -+} __attribute__ ((packed)); -+ -+/** -+ * union ubifs_dev_desc - device node descriptor. -+ * @new: new type device descriptor -+ * @huge: huge type device descriptor -+ * -+ * This data structure describes major/minor numbers of a device node. In an -+ * inode is a device node then its data contains an object of this type. UBIFS -+ * uses standard Linux "new" and "huge" device node encodings. -+ */ -+union ubifs_dev_desc { -+ __le32 new; -+ __le64 huge; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_ino_node - inode node. -+ * @ch: common header -+ * @key: node key -+ * @creat_sqnum: sequence number at time of creation -+ * @size: inode size in bytes (amount of uncompressed data) -+ * @atime_sec: access time seconds -+ * @ctime_sec: creation time seconds -+ * @mtime_sec: modification time seconds -+ * @atime_nsec: access time nanoseconds -+ * @ctime_nsec: creation time nanoseconds -+ * @mtime_nsec: modification time nanoseconds -+ * @nlink: number of hard links -+ * @uid: owner ID -+ * @gid: group ID -+ * @mode: access flags -+ * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc) -+ * @data_len: inode data length -+ * @xattr_cnt: count of extended attributes this inode has -+ * @xattr_size: summarized size of all extended attributes in bytes -+ * @padding1: reserved for future, zeroes -+ * @xattr_names: sum of lengths of all extended attribute names belonging to -+ * this inode -+ * @compr_type: compression type used for this inode -+ * @padding2: reserved for future, zeroes -+ * @data: data attached to the inode -+ * -+ * Note, even though inode compression type is defined by @compr_type, some -+ * nodes of this inode may be compressed with different compressor - this -+ * happens if compression type is changed while the inode already has data -+ * nodes. But @compr_type will be use for further writes to the inode. -+ * -+ * Note, do not forget to amend 'zero_ino_node_unused()' function when changing -+ * the padding fields. -+ */ -+struct ubifs_ino_node { -+ struct ubifs_ch ch; -+ __u8 key[UBIFS_MAX_KEY_LEN]; -+ __le64 creat_sqnum; -+ __le64 size; -+ __le64 atime_sec; -+ __le64 ctime_sec; -+ __le64 mtime_sec; -+ __le32 atime_nsec; -+ __le32 ctime_nsec; -+ __le32 mtime_nsec; -+ __le32 nlink; -+ __le32 uid; -+ __le32 gid; -+ __le32 mode; -+ __le32 flags; -+ __le32 data_len; -+ __le32 xattr_cnt; -+ __le32 xattr_size; -+ __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */ -+ __le32 xattr_names; -+ __le16 compr_type; -+ __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ -+ __u8 data[]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_dent_node - directory entry node. -+ * @ch: common header -+ * @key: node key -+ * @inum: target inode number -+ * @padding1: reserved for future, zeroes -+ * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc) -+ * @nlen: name length -+ * @padding2: reserved for future, zeroes -+ * @name: zero-terminated name -+ * -+ * Note, do not forget to amend 'zero_dent_node_unused()' function when -+ * changing the padding fields. -+ */ -+struct ubifs_dent_node { -+ struct ubifs_ch ch; -+ __u8 key[UBIFS_MAX_KEY_LEN]; -+ __le64 inum; -+ __u8 padding1; -+ __u8 type; -+ __le16 nlen; -+ __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ -+ __u8 name[]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_data_node - data node. -+ * @ch: common header -+ * @key: node key -+ * @size: uncompressed data size in bytes -+ * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc) -+ * @padding: reserved for future, zeroes -+ * @data: data -+ * -+ * Note, do not forget to amend 'zero_data_node_unused()' function when -+ * changing the padding fields. -+ */ -+struct ubifs_data_node { -+ struct ubifs_ch ch; -+ __u8 key[UBIFS_MAX_KEY_LEN]; -+ __le32 size; -+ __le16 compr_type; -+ __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ -+ __u8 data[]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_trun_node - truncation node. -+ * @ch: common header -+ * @inum: truncated inode number -+ * @padding: reserved for future, zeroes -+ * @old_size: size before truncation -+ * @new_size: size after truncation -+ * -+ * This node exists only in the journal and never goes to the main area. Note, -+ * do not forget to amend 'zero_trun_node_unused()' function when changing the -+ * padding fields. -+ */ -+struct ubifs_trun_node { -+ struct ubifs_ch ch; -+ __le32 inum; -+ __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ -+ __le64 old_size; -+ __le64 new_size; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_pad_node - padding node. -+ * @ch: common header -+ * @pad_len: how many bytes after this node are unused (because padded) -+ * @padding: reserved for future, zeroes -+ */ -+struct ubifs_pad_node { -+ struct ubifs_ch ch; -+ __le32 pad_len; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_sb_node - superblock node. -+ * @ch: common header -+ * @padding: reserved for future, zeroes -+ * @key_hash: type of hash function used in keys -+ * @key_fmt: format of the key -+ * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc) -+ * @min_io_size: minimal input/output unit size -+ * @leb_size: logical eraseblock size in bytes -+ * @leb_cnt: count of LEBs used by file-system -+ * @max_leb_cnt: maximum count of LEBs used by file-system -+ * @max_bud_bytes: maximum amount of data stored in buds -+ * @log_lebs: log size in logical eraseblocks -+ * @lpt_lebs: number of LEBs used for lprops table -+ * @orph_lebs: number of LEBs used for recording orphans -+ * @jhead_cnt: count of journal heads -+ * @fanout: tree fanout (max. number of links per indexing node) -+ * @lsave_cnt: number of LEB numbers in LPT's save table -+ * @fmt_version: UBIFS on-flash format version -+ * @default_compr: default compression -+ * @padding1: reserved for future, zeroes -+ * @rp_uid: reserve pool UID -+ * @rp_gid: reserve pool GID -+ * @rp_size: size of the reserved pool in bytes -+ * @padding2: reserved for future, zeroes -+ * @time_gran: time granularity in nanoseconds -+ * @uuid: UUID generated when the file system image was created -+ */ -+struct ubifs_sb_node { -+ struct ubifs_ch ch; -+ __u8 padding[2]; -+ __u8 key_hash; -+ __u8 key_fmt; -+ __le32 flags; -+ __le32 min_io_size; -+ __le32 leb_size; -+ __le32 leb_cnt; -+ __le32 max_leb_cnt; -+ __le64 max_bud_bytes; -+ __le32 log_lebs; -+ __le32 lpt_lebs; -+ __le32 orph_lebs; -+ __le32 jhead_cnt; -+ __le32 fanout; -+ __le32 lsave_cnt; -+ __le32 fmt_version; -+ __le16 default_compr; -+ __u8 padding1[2]; -+ __le32 rp_uid; -+ __le32 rp_gid; -+ __le64 rp_size; -+ __le32 time_gran; -+ __u8 uuid[16]; -+ __u8 padding2[3972]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_mst_node - master node. -+ * @ch: common header -+ * @highest_inum: highest inode number in the committed index -+ * @cmt_no: commit number -+ * @flags: various flags (%UBIFS_MST_DIRTY, etc) -+ * @log_lnum: start of the log -+ * @root_lnum: LEB number of the root indexing node -+ * @root_offs: offset within @root_lnum -+ * @root_len: root indexing node length -+ * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was -+ * not reserved and should be reserved on mount) -+ * @ihead_lnum: LEB number of index head -+ * @ihead_offs: offset of index head -+ * @index_size: size of index on flash -+ * @total_free: total free space in bytes -+ * @total_dirty: total dirty space in bytes -+ * @total_used: total used space in bytes (includes only data LEBs) -+ * @total_dead: total dead space in bytes (includes only data LEBs) -+ * @total_dark: total dark space in bytes (includes only data LEBs) -+ * @lpt_lnum: LEB number of LPT root nnode -+ * @lpt_offs: offset of LPT root nnode -+ * @nhead_lnum: LEB number of LPT head -+ * @nhead_offs: offset of LPT head -+ * @ltab_lnum: LEB number of LPT's own lprops table -+ * @ltab_offs: offset of LPT's own lprops table -+ * @lsave_lnum: LEB number of LPT's save table (big model only) -+ * @lsave_offs: offset of LPT's save table (big model only) -+ * @lscan_lnum: LEB number of last LPT scan -+ * @empty_lebs: number of empty logical eraseblocks -+ * @idx_lebs: number of indexing logical eraseblocks -+ * @leb_cnt: count of LEBs used by file-system -+ * @padding: reserved for future, zeroes -+ */ -+struct ubifs_mst_node { -+ struct ubifs_ch ch; -+ __le64 highest_inum; -+ __le64 cmt_no; -+ __le32 flags; -+ __le32 log_lnum; -+ __le32 root_lnum; -+ __le32 root_offs; -+ __le32 root_len; -+ __le32 gc_lnum; -+ __le32 ihead_lnum; -+ __le32 ihead_offs; -+ __le64 index_size; -+ __le64 total_free; -+ __le64 total_dirty; -+ __le64 total_used; -+ __le64 total_dead; -+ __le64 total_dark; -+ __le32 lpt_lnum; -+ __le32 lpt_offs; -+ __le32 nhead_lnum; -+ __le32 nhead_offs; -+ __le32 ltab_lnum; -+ __le32 ltab_offs; -+ __le32 lsave_lnum; -+ __le32 lsave_offs; -+ __le32 lscan_lnum; -+ __le32 empty_lebs; -+ __le32 idx_lebs; -+ __le32 leb_cnt; -+ __u8 padding[344]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_ref_node - logical eraseblock reference node. -+ * @ch: common header -+ * @lnum: the referred logical eraseblock number -+ * @offs: start offset in the referred LEB -+ * @jhead: journal head number -+ * @padding: reserved for future, zeroes -+ */ -+struct ubifs_ref_node { -+ struct ubifs_ch ch; -+ __le32 lnum; -+ __le32 offs; -+ __le32 jhead; -+ __u8 padding[28]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_branch - key/reference/length branch -+ * @lnum: LEB number of the target node -+ * @offs: offset within @lnum -+ * @len: target node length -+ * @key: key -+ */ -+struct ubifs_branch { -+ __le32 lnum; -+ __le32 offs; -+ __le32 len; -+ __u8 key[]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_idx_node - indexing node. -+ * @ch: common header -+ * @child_cnt: number of child index nodes -+ * @level: tree level -+ * @branches: LEB number / offset / length / key branches -+ */ -+struct ubifs_idx_node { -+ struct ubifs_ch ch; -+ __le16 child_cnt; -+ __le16 level; -+ __u8 branches[]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_cs_node - commit start node. -+ * @ch: common header -+ * @cmt_no: commit number -+ */ -+struct ubifs_cs_node { -+ struct ubifs_ch ch; -+ __le64 cmt_no; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubifs_orph_node - orphan node. -+ * @ch: common header -+ * @cmt_no: commit number (also top bit is set on the last node of the commit) -+ * @inos: inode numbers of orphans -+ */ -+struct ubifs_orph_node { -+ struct ubifs_ch ch; -+ __le64 cmt_no; -+ __le64 inos[]; -+} __attribute__ ((packed)); -+ -+#endif /* __UBIFS_MEDIA_H__ */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/ubifs.h linux-2.6.24/fs/ubifs/ubifs.h ---- linux-2.6.24.orig/fs/ubifs/ubifs.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/ubifs.h 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,1722 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+#ifndef __UBIFS_H__ -+#define __UBIFS_H__ -+ -+#include <asm/div64.h> -+#include <linux/statfs.h> -+#include <linux/fs.h> -+#include <linux/err.h> -+#include <linux/sched.h> -+#include <linux/vmalloc.h> -+#include <linux/spinlock.h> -+#include <linux/mutex.h> -+#include <linux/rwsem.h> -+#include <linux/mtd/ubi.h> -+#include <linux/pagemap.h> -+#include <linux/backing-dev.h> -+#include "ubifs-media.h" -+ -+/* Version of this UBIFS implementation */ -+#define UBIFS_VERSION 1 -+ -+/* Normal UBIFS messages */ -+#define ubifs_msg(fmt, ...) \ -+ printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) -+/* UBIFS error messages */ -+#define ubifs_err(fmt, ...) \ -+ printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ -+ __func__, ##__VA_ARGS__) -+/* UBIFS warning messages */ -+#define ubifs_warn(fmt, ...) \ -+ printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ -+ current->pid, __func__, ##__VA_ARGS__) -+ -+/* UBIFS file system VFS magic number */ -+#define UBIFS_SUPER_MAGIC 0x24051905 -+ -+/* Number of UBIFS blocks per VFS page */ -+#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE) -+#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT) -+ -+/* "File system end of life" sequence number watermark */ -+#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL -+#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL -+ -+/* -+ * Minimum amount of LEBs reserved for the index. At present the index needs at -+ * least 2 LEBs: one for the index head and one for in-the-gaps method (which -+ * currently does not cater for the index head and so excludes it from -+ * consideration). -+ */ -+#define MIN_INDEX_LEBS 2 -+ -+/* Minimum amount of data UBIFS writes to the flash */ -+#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) -+ -+/* -+ * Currently we do not support inode number overlapping and re-using, so this -+ * watermark defines dangerous inode number level. This should be fixed later, -+ * although it is difficult to exceed current limit. Another option is to use -+ * 64-bit inode numbers, but this means more overhead. -+ */ -+#define INUM_WARN_WATERMARK 0xFFF00000 -+#define INUM_WATERMARK 0xFFFFFF00 -+ -+/* Largest key size supported in this implementation */ -+#define CUR_MAX_KEY_LEN UBIFS_SK_LEN -+ -+/* Maximum number of entries in each LPT (LEB category) heap */ -+#define LPT_HEAP_SZ 256 -+ -+/* -+ * Background thread name pattern. The numbers are UBI device and volume -+ * numbers. -+ */ -+#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -+ -+/* Default write-buffer synchronization timeout (5 secs) */ -+#define DEFAULT_WBUF_TIMEOUT (5 * HZ) -+ -+/* Maximum possible inode number (only 32-bit inodes are supported now) */ -+#define MAX_INUM 0xFFFFFFFF -+ -+/* Number of non-data journal heads */ -+#define NONDATA_JHEADS_CNT 2 -+ -+/* Garbage collector head */ -+#define GCHD 0 -+/* Base journal head number */ -+#define BASEHD 1 -+/* First "general purpose" journal head */ -+#define DATAHD 2 -+ -+/* 'No change' value for 'ubifs_change_lp()' */ -+#define LPROPS_NC 0x80000001 -+ -+/* -+ * There is no notion of truncation key because truncation nodes do not exist -+ * in TNC. However, when replaying, it is handy to introduce fake "truncation" -+ * keys for truncation nodes because the code becomes simpler. So we define -+ * %UBIFS_TRUN_KEY type. -+ */ -+#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT -+ -+/* -+ * How much a directory entry/extended attribute entry adds to the parent/host -+ * inode. -+ */ -+#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8) -+ -+/* How much an extended attribute adds to the host inode */ -+#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8) -+ -+/* -+ * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered -+ * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are -+ * considered "young". This is used by shrinker when selecting znode to trim -+ * off. -+ */ -+#define OLD_ZNODE_AGE 20 -+#define YOUNG_ZNODE_AGE 5 -+ -+/* -+ * Some compressors, like LZO, may end up with more data then the input buffer. -+ * So UBIFS always allocates larger output buffer, to be sure the compressor -+ * will not corrupt memory in case of worst case compression. -+ */ -+#define WORST_COMPR_FACTOR 2 -+ -+/* Maximum expected tree height for use by bottom_up_buf */ -+#define BOTTOM_UP_HEIGHT 64 -+ -+/* Maximum number of data nodes to bulk-read */ -+#define UBIFS_MAX_BULK_READ 32 -+ -+/* -+ * Lockdep classes for UBIFS inode @ui_mutex. -+ */ -+enum { -+ WB_MUTEX_1 = 0, -+ WB_MUTEX_2 = 1, -+ WB_MUTEX_3 = 2, -+}; -+ -+/* -+ * Znode flags (actually, bit numbers which store the flags). -+ * -+ * DIRTY_ZNODE: znode is dirty -+ * COW_ZNODE: znode is being committed and a new instance of this znode has to -+ * be created before changing this znode -+ * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is -+ * still in the commit list and the ongoing commit operation -+ * will commit it, and delete this znode after it is done -+ */ -+enum { -+ DIRTY_ZNODE = 0, -+ COW_ZNODE = 1, -+ OBSOLETE_ZNODE = 2, -+}; -+ -+/* -+ * Commit states. -+ * -+ * COMMIT_RESTING: commit is not wanted -+ * COMMIT_BACKGROUND: background commit has been requested -+ * COMMIT_REQUIRED: commit is required -+ * COMMIT_RUNNING_BACKGROUND: background commit is running -+ * COMMIT_RUNNING_REQUIRED: commit is running and it is required -+ * COMMIT_BROKEN: commit failed -+ */ -+enum { -+ COMMIT_RESTING = 0, -+ COMMIT_BACKGROUND, -+ COMMIT_REQUIRED, -+ COMMIT_RUNNING_BACKGROUND, -+ COMMIT_RUNNING_REQUIRED, -+ COMMIT_BROKEN, -+}; -+ -+/* -+ * 'ubifs_scan_a_node()' return values. -+ * -+ * SCANNED_GARBAGE: scanned garbage -+ * SCANNED_EMPTY_SPACE: scanned empty space -+ * SCANNED_A_NODE: scanned a valid node -+ * SCANNED_A_CORRUPT_NODE: scanned a corrupted node -+ * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length -+ * -+ * Greater than zero means: 'scanned that number of padding bytes' -+ */ -+enum { -+ SCANNED_GARBAGE = 0, -+ SCANNED_EMPTY_SPACE = -1, -+ SCANNED_A_NODE = -2, -+ SCANNED_A_CORRUPT_NODE = -3, -+ SCANNED_A_BAD_PAD_NODE = -4, -+}; -+ -+/* -+ * LPT cnode flag bits. -+ * -+ * DIRTY_CNODE: cnode is dirty -+ * COW_CNODE: cnode is being committed and must be copied before writing -+ * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), -+ * so it can (and must) be freed when the commit is finished -+ */ -+enum { -+ DIRTY_CNODE = 0, -+ COW_CNODE = 1, -+ OBSOLETE_CNODE = 2, -+}; -+ -+/* -+ * Dirty flag bits (lpt_drty_flgs) for LPT special nodes. -+ * -+ * LTAB_DIRTY: ltab node is dirty -+ * LSAVE_DIRTY: lsave node is dirty -+ */ -+enum { -+ LTAB_DIRTY = 1, -+ LSAVE_DIRTY = 2, -+}; -+ -+/* -+ * Return codes used by the garbage collector. -+ * @LEB_FREED: the logical eraseblock was freed and is ready to use -+ * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit -+ * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes -+ */ -+enum { -+ LEB_FREED, -+ LEB_FREED_IDX, -+ LEB_RETAINED, -+}; -+ -+/** -+ * struct ubifs_old_idx - index node obsoleted since last commit start. -+ * @rb: rb-tree node -+ * @lnum: LEB number of obsoleted index node -+ * @offs: offset of obsoleted index node -+ */ -+struct ubifs_old_idx { -+ struct rb_node rb; -+ int lnum; -+ int offs; -+}; -+ -+/* The below union makes it easier to deal with keys */ -+union ubifs_key { -+ uint8_t u8[CUR_MAX_KEY_LEN]; -+ uint32_t u32[CUR_MAX_KEY_LEN/4]; -+ uint64_t u64[CUR_MAX_KEY_LEN/8]; -+ __le32 j32[CUR_MAX_KEY_LEN/4]; -+}; -+ -+/** -+ * struct ubifs_scan_node - UBIFS scanned node information. -+ * @list: list of scanned nodes -+ * @key: key of node scanned (if it has one) -+ * @sqnum: sequence number -+ * @type: type of node scanned -+ * @offs: offset with LEB of node scanned -+ * @len: length of node scanned -+ * @node: raw node -+ */ -+struct ubifs_scan_node { -+ struct list_head list; -+ union ubifs_key key; -+ unsigned long long sqnum; -+ int type; -+ int offs; -+ int len; -+ void *node; -+}; -+ -+/** -+ * struct ubifs_scan_leb - UBIFS scanned LEB information. -+ * @lnum: logical eraseblock number -+ * @nodes_cnt: number of nodes scanned -+ * @nodes: list of struct ubifs_scan_node -+ * @endpt: end point (and therefore the start of empty space) -+ * @ecc: read returned -EBADMSG -+ * @buf: buffer containing entire LEB scanned -+ */ -+struct ubifs_scan_leb { -+ int lnum; -+ int nodes_cnt; -+ struct list_head nodes; -+ int endpt; -+ int ecc; -+ void *buf; -+}; -+ -+/** -+ * struct ubifs_gced_idx_leb - garbage-collected indexing LEB. -+ * @list: list -+ * @lnum: LEB number -+ * @unmap: OK to unmap this LEB -+ * -+ * This data structure is used to temporary store garbage-collected indexing -+ * LEBs - they are not released immediately, but only after the next commit. -+ * This is needed to guarantee recoverability. -+ */ -+struct ubifs_gced_idx_leb { -+ struct list_head list; -+ int lnum; -+ int unmap; -+}; -+ -+/** -+ * struct ubifs_inode - UBIFS in-memory inode description. -+ * @vfs_inode: VFS inode description object -+ * @creat_sqnum: sequence number at time of creation -+ * @del_cmtno: commit number corresponding to the time the inode was deleted, -+ * protected by @c->commit_sem; -+ * @xattr_size: summarized size of all extended attributes in bytes -+ * @xattr_cnt: count of extended attributes this inode has -+ * @xattr_names: sum of lengths of all extended attribute names belonging to -+ * this inode -+ * @dirty: non-zero if the inode is dirty -+ * @xattr: non-zero if this is an extended attribute inode -+ * @bulk_read: non-zero if bulk-read should be used -+ * @ui_mutex: serializes inode write-back with the rest of VFS operations, -+ * serializes "clean <-> dirty" state changes, serializes bulk-read, -+ * protects @dirty, @bulk_read, @ui_size, and @xattr_size -+ * @ui_lock: protects @synced_i_size -+ * @synced_i_size: synchronized size of inode, i.e. the value of inode size -+ * currently stored on the flash; used only for regular file -+ * inodes -+ * @ui_size: inode size used by UBIFS when writing to flash -+ * @flags: inode flags (@UBIFS_COMPR_FL, etc) -+ * @compr_type: default compression type used for this inode -+ * @last_page_read: page number of last page read (for bulk read) -+ * @read_in_a_row: number of consecutive pages read in a row (for bulk read) -+ * @data_len: length of the data attached to the inode -+ * @data: inode's data -+ * -+ * @ui_mutex exists for two main reasons. At first it prevents inodes from -+ * being written back while UBIFS changing them, being in the middle of an VFS -+ * operation. This way UBIFS makes sure the inode fields are consistent. For -+ * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and -+ * write-back must not write any of them before we have finished. -+ * -+ * The second reason is budgeting - UBIFS has to budget all operations. If an -+ * operation is going to mark an inode dirty, it has to allocate budget for -+ * this. It cannot just mark it dirty because there is no guarantee there will -+ * be enough flash space to write the inode back later. This means UBIFS has -+ * to have full control over inode "clean <-> dirty" transitions (and pages -+ * actually). But unfortunately, VFS marks inodes dirty in many places, and it -+ * does not ask the file-system if it is allowed to do so (there is a notifier, -+ * but it is not enough), i.e., there is no mechanism to synchronize with this. -+ * So UBIFS has its own inode dirty flag and its own mutex to serialize -+ * "clean <-> dirty" transitions. -+ * -+ * The @synced_i_size field is used to make sure we never write pages which are -+ * beyond last synchronized inode size. See 'ubifs_writepage()' for more -+ * information. -+ * -+ * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses -+ * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot -+ * make sure @inode->i_size is always changed under @ui_mutex, because it -+ * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock -+ * with 'ubifs_writepage()' (see file.c). All the other inode fields are -+ * changed under @ui_mutex, so they do not need "shadow" fields. Note, one -+ * could consider to rework locking and base it on "shadow" fields. -+ */ -+struct ubifs_inode { -+ struct inode vfs_inode; -+ unsigned long long creat_sqnum; -+ unsigned long long del_cmtno; -+ unsigned int xattr_size; -+ unsigned int xattr_cnt; -+ unsigned int xattr_names; -+ unsigned int dirty:1; -+ unsigned int xattr:1; -+ unsigned int bulk_read:1; -+ unsigned int compr_type:2; -+ struct mutex ui_mutex; -+ spinlock_t ui_lock; -+ loff_t synced_i_size; -+ loff_t ui_size; -+ int flags; -+ pgoff_t last_page_read; -+ pgoff_t read_in_a_row; -+ int data_len; -+ void *data; -+}; -+ -+/** -+ * struct ubifs_unclean_leb - records a LEB recovered under read-only mode. -+ * @list: list -+ * @lnum: LEB number of recovered LEB -+ * @endpt: offset where recovery ended -+ * -+ * This structure records a LEB identified during recovery that needs to be -+ * cleaned but was not because UBIFS was mounted read-only. The information -+ * is used to clean the LEB when remounting to read-write mode. -+ */ -+struct ubifs_unclean_leb { -+ struct list_head list; -+ int lnum; -+ int endpt; -+}; -+ -+/* -+ * LEB properties flags. -+ * -+ * LPROPS_UNCAT: not categorized -+ * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index -+ * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index -+ * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index -+ * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs -+ * LPROPS_EMPTY: LEB is empty, not taken -+ * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken -+ * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken -+ * LPROPS_CAT_MASK: mask for the LEB categories above -+ * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media) -+ * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash) -+ */ -+enum { -+ LPROPS_UNCAT = 0, -+ LPROPS_DIRTY = 1, -+ LPROPS_DIRTY_IDX = 2, -+ LPROPS_FREE = 3, -+ LPROPS_HEAP_CNT = 3, -+ LPROPS_EMPTY = 4, -+ LPROPS_FREEABLE = 5, -+ LPROPS_FRDI_IDX = 6, -+ LPROPS_CAT_MASK = 15, -+ LPROPS_TAKEN = 16, -+ LPROPS_INDEX = 32, -+}; -+ -+/** -+ * struct ubifs_lprops - logical eraseblock properties. -+ * @free: amount of free space in bytes -+ * @dirty: amount of dirty space in bytes -+ * @flags: LEB properties flags (see above) -+ * @lnum: LEB number -+ * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE) -+ * @hpos: heap position in heap of same-category lprops (other categories) -+ */ -+struct ubifs_lprops { -+ int free; -+ int dirty; -+ int flags; -+ int lnum; -+ union { -+ struct list_head list; -+ int hpos; -+ }; -+}; -+ -+/** -+ * struct ubifs_lpt_lprops - LPT logical eraseblock properties. -+ * @free: amount of free space in bytes -+ * @dirty: amount of dirty space in bytes -+ * @tgc: trivial GC flag (1 => unmap after commit end) -+ * @cmt: commit flag (1 => reserved for commit) -+ */ -+struct ubifs_lpt_lprops { -+ int free; -+ int dirty; -+ unsigned tgc:1; -+ unsigned cmt:1; -+}; -+ -+/** -+ * struct ubifs_lp_stats - statistics of eraseblocks in the main area. -+ * @empty_lebs: number of empty LEBs -+ * @taken_empty_lebs: number of taken LEBs -+ * @idx_lebs: number of indexing LEBs -+ * @total_free: total free space in bytes (includes all LEBs) -+ * @total_dirty: total dirty space in bytes (includes all LEBs) -+ * @total_used: total used space in bytes (does not include index LEBs) -+ * @total_dead: total dead space in bytes (does not include index LEBs) -+ * @total_dark: total dark space in bytes (does not include index LEBs) -+ * -+ * The @taken_empty_lebs field counts the LEBs that are in the transient state -+ * of having been "taken" for use but not yet written to. @taken_empty_lebs is -+ * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be -+ * used by itself (in which case 'unused_lebs' would be a better name). In the -+ * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained -+ * by GC, but unlike other empty LEBs that are "taken", it may not be written -+ * straight away (i.e. before the next commit start or unmount), so either -+ * @gc_lnum must be specially accounted for, or the current approach followed -+ * i.e. count it under @taken_empty_lebs. -+ * -+ * @empty_lebs includes @taken_empty_lebs. -+ * -+ * @total_used, @total_dead and @total_dark fields do not account indexing -+ * LEBs. -+ */ -+struct ubifs_lp_stats { -+ int empty_lebs; -+ int taken_empty_lebs; -+ int idx_lebs; -+ long long total_free; -+ long long total_dirty; -+ long long total_used; -+ long long total_dead; -+ long long total_dark; -+}; -+ -+struct ubifs_nnode; -+ -+/** -+ * struct ubifs_cnode - LEB Properties Tree common node. -+ * @parent: parent nnode -+ * @cnext: next cnode to commit -+ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) -+ * @iip: index in parent -+ * @level: level in the tree (zero for pnodes, greater than zero for nnodes) -+ * @num: node number -+ */ -+struct ubifs_cnode { -+ struct ubifs_nnode *parent; -+ struct ubifs_cnode *cnext; -+ unsigned long flags; -+ int iip; -+ int level; -+ int num; -+}; -+ -+/** -+ * struct ubifs_pnode - LEB Properties Tree leaf node. -+ * @parent: parent nnode -+ * @cnext: next cnode to commit -+ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) -+ * @iip: index in parent -+ * @level: level in the tree (always zero for pnodes) -+ * @num: node number -+ * @lprops: LEB properties array -+ */ -+struct ubifs_pnode { -+ struct ubifs_nnode *parent; -+ struct ubifs_cnode *cnext; -+ unsigned long flags; -+ int iip; -+ int level; -+ int num; -+ struct ubifs_lprops lprops[UBIFS_LPT_FANOUT]; -+}; -+ -+/** -+ * struct ubifs_nbranch - LEB Properties Tree internal node branch. -+ * @lnum: LEB number of child -+ * @offs: offset of child -+ * @nnode: nnode child -+ * @pnode: pnode child -+ * @cnode: cnode child -+ */ -+struct ubifs_nbranch { -+ int lnum; -+ int offs; -+ union { -+ struct ubifs_nnode *nnode; -+ struct ubifs_pnode *pnode; -+ struct ubifs_cnode *cnode; -+ }; -+}; -+ -+/** -+ * struct ubifs_nnode - LEB Properties Tree internal node. -+ * @parent: parent nnode -+ * @cnext: next cnode to commit -+ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) -+ * @iip: index in parent -+ * @level: level in the tree (always greater than zero for nnodes) -+ * @num: node number -+ * @nbranch: branches to child nodes -+ */ -+struct ubifs_nnode { -+ struct ubifs_nnode *parent; -+ struct ubifs_cnode *cnext; -+ unsigned long flags; -+ int iip; -+ int level; -+ int num; -+ struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT]; -+}; -+ -+/** -+ * struct ubifs_lpt_heap - heap of categorized lprops. -+ * @arr: heap array -+ * @cnt: number in heap -+ * @max_cnt: maximum number allowed in heap -+ * -+ * There are %LPROPS_HEAP_CNT heaps. -+ */ -+struct ubifs_lpt_heap { -+ struct ubifs_lprops **arr; -+ int cnt; -+ int max_cnt; -+}; -+ -+/* -+ * Return codes for LPT scan callback function. -+ * -+ * LPT_SCAN_CONTINUE: continue scanning -+ * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory -+ * LPT_SCAN_STOP: stop scanning -+ */ -+enum { -+ LPT_SCAN_CONTINUE = 0, -+ LPT_SCAN_ADD = 1, -+ LPT_SCAN_STOP = 2, -+}; -+ -+struct ubifs_info; -+ -+/* Callback used by the 'ubifs_lpt_scan_nolock()' function */ -+typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, -+ const struct ubifs_lprops *lprops, -+ int in_tree, void *data); -+ -+/** -+ * struct ubifs_wbuf - UBIFS write-buffer. -+ * @c: UBIFS file-system description object -+ * @buf: write-buffer (of min. flash I/O unit size) -+ * @lnum: logical eraseblock number the write-buffer points to -+ * @offs: write-buffer offset in this logical eraseblock -+ * @avail: number of bytes available in the write-buffer -+ * @used: number of used bytes in the write-buffer -+ * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, -+ * %UBI_UNKNOWN) -+ * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep -+ * up by 'mutex_lock_nested()). -+ * @sync_callback: write-buffer synchronization callback -+ * @io_mutex: serializes write-buffer I/O -+ * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes -+ * fields -+ * @timer: write-buffer timer -+ * @timeout: timer expire interval in jiffies -+ * @need_sync: it is set if its timer expired and needs sync -+ * @next_ino: points to the next position of the following inode number -+ * @inodes: stores the inode numbers of the nodes which are in wbuf -+ * -+ * The write-buffer synchronization callback is called when the write-buffer is -+ * synchronized in order to notify how much space was wasted due to -+ * write-buffer padding and how much free space is left in the LEB. -+ * -+ * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under -+ * spin-lock or mutex because they are written under both mutex and spin-lock. -+ * @buf is appended to under mutex but overwritten under both mutex and -+ * spin-lock. Thus the data between @buf and @buf + @used can be read under -+ * spinlock. -+ */ -+struct ubifs_wbuf { -+ struct ubifs_info *c; -+ void *buf; -+ int lnum; -+ int offs; -+ int avail; -+ int used; -+ int dtype; -+ int jhead; -+ int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); -+ struct mutex io_mutex; -+ spinlock_t lock; -+ struct timer_list timer; -+ int timeout; -+ int need_sync; -+ int next_ino; -+ ino_t *inodes; -+}; -+ -+/** -+ * struct ubifs_bud - bud logical eraseblock. -+ * @lnum: logical eraseblock number -+ * @start: where the (uncommitted) bud data starts -+ * @jhead: journal head number this bud belongs to -+ * @list: link in the list buds belonging to the same journal head -+ * @rb: link in the tree of all buds -+ */ -+struct ubifs_bud { -+ int lnum; -+ int start; -+ int jhead; -+ struct list_head list; -+ struct rb_node rb; -+}; -+ -+/** -+ * struct ubifs_jhead - journal head. -+ * @wbuf: head's write-buffer -+ * @buds_list: list of bud LEBs belonging to this journal head -+ * -+ * Note, the @buds list is protected by the @c->buds_lock. -+ */ -+struct ubifs_jhead { -+ struct ubifs_wbuf wbuf; -+ struct list_head buds_list; -+}; -+ -+/** -+ * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. -+ * @key: key -+ * @znode: znode address in memory -+ * @lnum: LEB number of the target node (indexing node or data node) -+ * @offs: target node offset within @lnum -+ * @len: target node length -+ */ -+struct ubifs_zbranch { -+ union ubifs_key key; -+ union { -+ struct ubifs_znode *znode; -+ void *leaf; -+ }; -+ int lnum; -+ int offs; -+ int len; -+}; -+ -+/** -+ * struct ubifs_znode - in-memory representation of an indexing node. -+ * @parent: parent znode or NULL if it is the root -+ * @cnext: next znode to commit -+ * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE) -+ * @time: last access time (seconds) -+ * @level: level of the entry in the TNC tree -+ * @child_cnt: count of child znodes -+ * @iip: index in parent's zbranch array -+ * @alt: lower bound of key range has altered i.e. child inserted at slot 0 -+ * @lnum: LEB number of the corresponding indexing node -+ * @offs: offset of the corresponding indexing node -+ * @len: length of the corresponding indexing node -+ * @zbranch: array of znode branches (@c->fanout elements) -+ */ -+struct ubifs_znode { -+ struct ubifs_znode *parent; -+ struct ubifs_znode *cnext; -+ unsigned long flags; -+ unsigned long time; -+ int level; -+ int child_cnt; -+ int iip; -+ int alt; -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ int lnum, offs, len; -+#endif -+ struct ubifs_zbranch zbranch[]; -+}; -+ -+/** -+ * struct bu_info - bulk-read information. -+ * @key: first data node key -+ * @zbranch: zbranches of data nodes to bulk read -+ * @buf: buffer to read into -+ * @buf_len: buffer length -+ * @gc_seq: GC sequence number to detect races with GC -+ * @cnt: number of data nodes for bulk read -+ * @blk_cnt: number of data blocks including holes -+ * @oef: end of file reached -+ */ -+struct bu_info { -+ union ubifs_key key; -+ struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ]; -+ void *buf; -+ int buf_len; -+ int gc_seq; -+ int cnt; -+ int blk_cnt; -+ int eof; -+}; -+ -+/** -+ * struct ubifs_node_range - node length range description data structure. -+ * @len: fixed node length -+ * @min_len: minimum possible node length -+ * @max_len: maximum possible node length -+ * -+ * If @max_len is %0, the node has fixed length @len. -+ */ -+struct ubifs_node_range { -+ union { -+ int len; -+ int min_len; -+ }; -+ int max_len; -+}; -+ -+/** -+ * struct ubifs_compressor - UBIFS compressor description structure. -+ * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc) -+ * @cc: cryptoapi compressor handle -+ * @comp_mutex: mutex used during compression -+ * @decomp_mutex: mutex used during decompression -+ * @name: compressor name -+ * @capi_name: cryptoapi compressor name -+ */ -+struct ubifs_compressor { -+ int compr_type; -+ struct crypto_comp *cc; -+ struct mutex *comp_mutex; -+ struct mutex *decomp_mutex; -+ const char *name; -+ const char *capi_name; -+}; -+ -+/** -+ * struct ubifs_budget_req - budget requirements of an operation. -+ * -+ * @fast: non-zero if the budgeting should try to acquire budget quickly and -+ * should not try to call write-back -+ * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields -+ * have to be re-calculated -+ * @new_page: non-zero if the operation adds a new page -+ * @dirtied_page: non-zero if the operation makes a page dirty -+ * @new_dent: non-zero if the operation adds a new directory entry -+ * @mod_dent: non-zero if the operation removes or modifies an existing -+ * directory entry -+ * @new_ino: non-zero if the operation adds a new inode -+ * @new_ino_d: now much data newly created inode contains -+ * @dirtied_ino: how many inodes the operation makes dirty -+ * @dirtied_ino_d: now much data dirtied inode contains -+ * @idx_growth: how much the index will supposedly grow -+ * @data_growth: how much new data the operation will supposedly add -+ * @dd_growth: how much data that makes other data dirty the operation will -+ * supposedly add -+ * -+ * @idx_growth, @data_growth and @dd_growth are not used in budget request. The -+ * budgeting subsystem caches index and data growth values there to avoid -+ * re-calculating them when the budget is released. However, if @idx_growth is -+ * %-1, it is calculated by the release function using other fields. -+ * -+ * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d -+ * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made -+ * dirty by the re-name operation. -+ * -+ * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to -+ * make sure the amount of inode data which contribute to @new_ino_d and -+ * @dirtied_ino_d fields are aligned. -+ */ -+struct ubifs_budget_req { -+ unsigned int fast:1; -+ unsigned int recalculate:1; -+#ifndef UBIFS_DEBUG -+ unsigned int new_page:1; -+ unsigned int dirtied_page:1; -+ unsigned int new_dent:1; -+ unsigned int mod_dent:1; -+ unsigned int new_ino:1; -+ unsigned int new_ino_d:13; -+ unsigned int dirtied_ino:4; -+ unsigned int dirtied_ino_d:15; -+#else -+ /* Not bit-fields to check for overflows */ -+ unsigned int new_page; -+ unsigned int dirtied_page; -+ unsigned int new_dent; -+ unsigned int mod_dent; -+ unsigned int new_ino; -+ unsigned int new_ino_d; -+ unsigned int dirtied_ino; -+ unsigned int dirtied_ino_d; -+#endif -+ int idx_growth; -+ int data_growth; -+ int dd_growth; -+}; -+ -+/** -+ * struct ubifs_orphan - stores the inode number of an orphan. -+ * @rb: rb-tree node of rb-tree of orphans sorted by inode number -+ * @list: list head of list of orphans in order added -+ * @new_list: list head of list of orphans added since the last commit -+ * @cnext: next orphan to commit -+ * @dnext: next orphan to delete -+ * @inum: inode number -+ * @new: %1 => added since the last commit, otherwise %0 -+ */ -+struct ubifs_orphan { -+ struct rb_node rb; -+ struct list_head list; -+ struct list_head new_list; -+ struct ubifs_orphan *cnext; -+ struct ubifs_orphan *dnext; -+ ino_t inum; -+ int new; -+}; -+ -+/** -+ * struct ubifs_mount_opts - UBIFS-specific mount options information. -+ * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) -+ * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) -+ * @chk_data_crc: enable/disable CRC data checking when reading data nodes -+ * (%0 default, %1 disabe, %2 enable) -+ * @override_compr: override default compressor (%0 - do not override and use -+ * superblock compressor, %1 - override and use compressor -+ * specified in @compr_type) -+ * @compr_type: compressor type to override the superblock compressor with -+ * (%UBIFS_COMPR_NONE, etc) -+ */ -+struct ubifs_mount_opts { -+ unsigned int unmount_mode:2; -+ unsigned int bulk_read:2; -+ unsigned int chk_data_crc:2; -+ unsigned int override_compr:1; -+ unsigned int compr_type:2; -+}; -+ -+struct ubifs_debug_info; -+ -+/** -+ * struct ubifs_info - UBIFS file-system description data structure -+ * (per-superblock). -+ * @vfs_sb: VFS @struct super_block object -+ * @bdi: backing device info object to make VFS happy and disable read-ahead -+ * -+ * @highest_inum: highest used inode number -+ * @max_sqnum: current global sequence number -+ * @cmt_no: commit number of the last successfully completed commit, protected -+ * by @commit_sem -+ * @cnt_lock: protects @highest_inum and @max_sqnum counters -+ * @fmt_version: UBIFS on-flash format version -+ * @uuid: UUID from super block -+ * -+ * @lhead_lnum: log head logical eraseblock number -+ * @lhead_offs: log head offset -+ * @ltail_lnum: log tail logical eraseblock number (offset is always 0) -+ * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and -+ * @bud_bytes -+ * @min_log_bytes: minimum required number of bytes in the log -+ * @cmt_bud_bytes: used during commit to temporarily amount of bytes in -+ * committed buds -+ * -+ * @buds: tree of all buds indexed by bud LEB number -+ * @bud_bytes: how many bytes of flash is used by buds -+ * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud -+ * lists -+ * @jhead_cnt: count of journal heads -+ * @jheads: journal heads (head zero is base head) -+ * @max_bud_bytes: maximum number of bytes allowed in buds -+ * @bg_bud_bytes: number of bud bytes when background commit is initiated -+ * @old_buds: buds to be released after commit ends -+ * @max_bud_cnt: maximum number of buds -+ * -+ * @commit_sem: synchronizes committer with other processes -+ * @cmt_state: commit state -+ * @cs_lock: commit state lock -+ * @cmt_wq: wait queue to sleep on if the log is full and a commit is running -+ * -+ * @big_lpt: flag that LPT is too big to write whole during commit -+ * @no_chk_data_crc: do not check CRCs when reading data nodes (except during -+ * recovery) -+ * @bulk_read: enable bulk-reads -+ * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) -+ * -+ * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and -+ * @calc_idx_sz -+ * @zroot: zbranch which points to the root index node and znode -+ * @cnext: next znode to commit -+ * @enext: next znode to commit to empty space -+ * @gap_lebs: array of LEBs used by the in-gaps commit method -+ * @cbuf: commit buffer -+ * @ileb_buf: buffer for commit in-the-gaps method -+ * @ileb_len: length of data in ileb_buf -+ * @ihead_lnum: LEB number of index head -+ * @ihead_offs: offset of index head -+ * @ilebs: pre-allocated index LEBs -+ * @ileb_cnt: number of pre-allocated index LEBs -+ * @ileb_nxt: next pre-allocated index LEBs -+ * @old_idx: tree of index nodes obsoleted since the last commit start -+ * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c -+ * -+ * @mst_node: master node -+ * @mst_offs: offset of valid master node -+ * @mst_mutex: protects the master node area, @mst_node, and @mst_offs -+ * -+ * @max_bu_buf_len: maximum bulk-read buffer length -+ * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu -+ * @bu: pre-allocated bulk-read information -+ * -+ * @log_lebs: number of logical eraseblocks in the log -+ * @log_bytes: log size in bytes -+ * @log_last: last LEB of the log -+ * @lpt_lebs: number of LEBs used for lprops table -+ * @lpt_first: first LEB of the lprops table area -+ * @lpt_last: last LEB of the lprops table area -+ * @orph_lebs: number of LEBs used for the orphan area -+ * @orph_first: first LEB of the orphan area -+ * @orph_last: last LEB of the orphan area -+ * @main_lebs: count of LEBs in the main area -+ * @main_first: first LEB of the main area -+ * @main_bytes: main area size in bytes -+ * -+ * @key_hash_type: type of the key hash -+ * @key_hash: direntry key hash function -+ * @key_fmt: key format -+ * @key_len: key length -+ * @fanout: fanout of the index tree (number of links per indexing node) -+ * -+ * @min_io_size: minimal input/output unit size -+ * @min_io_shift: number of bits in @min_io_size minus one -+ * @leb_size: logical eraseblock size in bytes -+ * @half_leb_size: half LEB size -+ * @leb_cnt: count of logical eraseblocks -+ * @max_leb_cnt: maximum count of logical eraseblocks -+ * @old_leb_cnt: count of logical eraseblocks before re-size -+ * @ro_media: the underlying UBI volume is read-only -+ * -+ * @dirty_pg_cnt: number of dirty pages (not used) -+ * @dirty_zn_cnt: number of dirty znodes -+ * @clean_zn_cnt: number of clean znodes -+ * -+ * @budg_idx_growth: amount of bytes budgeted for index growth -+ * @budg_data_growth: amount of bytes budgeted for cached data -+ * @budg_dd_growth: amount of bytes budgeted for cached data that will make -+ * other data dirty -+ * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, -+ * but which still have to be taken into account because -+ * the index has not been committed so far -+ * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, -+ * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, -+ * @nospace, and @nospace_rp; -+ * @min_idx_lebs: minimum number of LEBs required for the index -+ * @old_idx_sz: size of index on flash -+ * @calc_idx_sz: temporary variable which is used to calculate new index size -+ * (contains accurate new index size at end of TNC commit start) -+ * @lst: lprops statistics -+ * @nospace: non-zero if the file-system does not have flash space (used as -+ * optimization) -+ * @nospace_rp: the same as @nospace, but additionally means that even reserved -+ * pool is full -+ * -+ * @page_budget: budget for a page -+ * @inode_budget: budget for an inode -+ * @dent_budget: budget for a directory entry -+ * -+ * @ref_node_alsz: size of the LEB reference node aligned to the min. flash -+ * I/O unit -+ * @mst_node_alsz: master node aligned size -+ * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary -+ * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary -+ * @max_inode_sz: maximum possible inode size in bytes -+ * @max_znode_sz: size of znode in bytes -+ * -+ * @leb_overhead: how many bytes are wasted in an LEB when it is filled with -+ * data nodes of maximum size - used in free space reporting -+ * @dead_wm: LEB dead space watermark -+ * @dark_wm: LEB dark space watermark -+ * @block_cnt: count of 4KiB blocks on the FS -+ * -+ * @ranges: UBIFS node length ranges -+ * @ubi: UBI volume descriptor -+ * @di: UBI device information -+ * @vi: UBI volume information -+ * -+ * @orph_tree: rb-tree of orphan inode numbers -+ * @orph_list: list of orphan inode numbers in order added -+ * @orph_new: list of orphan inode numbers added since last commit -+ * @orph_cnext: next orphan to commit -+ * @orph_dnext: next orphan to delete -+ * @orphan_lock: lock for orph_tree and orph_new -+ * @orph_buf: buffer for orphan nodes -+ * @new_orphans: number of orphans since last commit -+ * @cmt_orphans: number of orphans being committed -+ * @tot_orphans: number of orphans in the rb_tree -+ * @max_orphans: maximum number of orphans allowed -+ * @ohead_lnum: orphan head LEB number -+ * @ohead_offs: orphan head offset -+ * @no_orphs: non-zero if there are no orphans -+ * -+ * @bgt: UBIFS background thread -+ * @bgt_name: background thread name -+ * @need_bgt: if background thread should run -+ * @need_wbuf_sync: if write-buffers have to be synchronized -+ * -+ * @gc_lnum: LEB number used for garbage collection -+ * @sbuf: a buffer of LEB size used by GC and replay for scanning -+ * @idx_gc: list of index LEBs that have been garbage collected -+ * @idx_gc_cnt: number of elements on the idx_gc list -+ * @gc_seq: incremented for every non-index LEB garbage collected -+ * @gced_lnum: last non-index LEB that was garbage collected -+ * -+ * @infos_list: links all 'ubifs_info' objects -+ * @umount_mutex: serializes shrinker and un-mount -+ * @shrinker_run_no: shrinker run number -+ * -+ * @space_bits: number of bits needed to record free or dirty space -+ * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT -+ * @lpt_offs_bits: number of bits needed to record an offset in the LPT -+ * @lpt_spc_bits: number of bits needed to space in the LPT -+ * @pcnt_bits: number of bits needed to record pnode or nnode number -+ * @lnum_bits: number of bits needed to record LEB number -+ * @nnode_sz: size of on-flash nnode -+ * @pnode_sz: size of on-flash pnode -+ * @ltab_sz: size of on-flash LPT lprops table -+ * @lsave_sz: size of on-flash LPT save table -+ * @pnode_cnt: number of pnodes -+ * @nnode_cnt: number of nnodes -+ * @lpt_hght: height of the LPT -+ * @pnodes_have: number of pnodes in memory -+ * -+ * @lp_mutex: protects lprops table and all the other lprops-related fields -+ * @lpt_lnum: LEB number of the root nnode of the LPT -+ * @lpt_offs: offset of the root nnode of the LPT -+ * @nhead_lnum: LEB number of LPT head -+ * @nhead_offs: offset of LPT head -+ * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab -+ * @dirty_nn_cnt: number of dirty nnodes -+ * @dirty_pn_cnt: number of dirty pnodes -+ * @check_lpt_free: flag that indicates LPT GC may be needed -+ * @lpt_sz: LPT size -+ * @lpt_nod_buf: buffer for an on-flash nnode or pnode -+ * @lpt_buf: buffer of LEB size used by LPT -+ * @nroot: address in memory of the root nnode of the LPT -+ * @lpt_cnext: next LPT node to commit -+ * @lpt_heap: array of heaps of categorized lprops -+ * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at -+ * previous commit start -+ * @uncat_list: list of un-categorized LEBs -+ * @empty_list: list of empty LEBs -+ * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) -+ * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) -+ * @freeable_cnt: number of freeable LEBs in @freeable_list -+ * -+ * @ltab_lnum: LEB number of LPT's own lprops table -+ * @ltab_offs: offset of LPT's own lprops table -+ * @ltab: LPT's own lprops table -+ * @ltab_cmt: LPT's own lprops table (commit copy) -+ * @lsave_cnt: number of LEB numbers in LPT's save table -+ * @lsave_lnum: LEB number of LPT's save table -+ * @lsave_offs: offset of LPT's save table -+ * @lsave: LPT's save table -+ * @lscan_lnum: LEB number of last LPT scan -+ * -+ * @rp_size: size of the reserved pool in bytes -+ * @report_rp_size: size of the reserved pool reported to user-space -+ * @rp_uid: reserved pool user ID -+ * @rp_gid: reserved pool group ID -+ * -+ * @empty: if the UBI device is empty -+ * @replay_tree: temporary tree used during journal replay -+ * @replay_list: temporary list used during journal replay -+ * @replay_buds: list of buds to replay -+ * @cs_sqnum: sequence number of first node in the log (commit start node) -+ * @replay_sqnum: sequence number of node currently being replayed -+ * @need_recovery: file-system needs recovery -+ * @replaying: set to %1 during journal replay -+ * @unclean_leb_list: LEBs to recover when mounting ro to rw -+ * @rcvrd_mst_node: recovered master node to write when mounting ro to rw -+ * @size_tree: inode size information for recovery -+ * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) -+ * @always_chk_crc: always check CRCs (while mounting and remounting rw) -+ * @mount_opts: UBIFS-specific mount options -+ * -+ * @dbg: debugging-related information -+ */ -+struct ubifs_info { -+ struct super_block *vfs_sb; -+ struct backing_dev_info bdi; -+ -+ ino_t highest_inum; -+ unsigned long long max_sqnum; -+ unsigned long long cmt_no; -+ spinlock_t cnt_lock; -+ int fmt_version; -+ unsigned char uuid[16]; -+ -+ int lhead_lnum; -+ int lhead_offs; -+ int ltail_lnum; -+ struct mutex log_mutex; -+ int min_log_bytes; -+ long long cmt_bud_bytes; -+ -+ struct rb_root buds; -+ long long bud_bytes; -+ spinlock_t buds_lock; -+ int jhead_cnt; -+ struct ubifs_jhead *jheads; -+ long long max_bud_bytes; -+ long long bg_bud_bytes; -+ struct list_head old_buds; -+ int max_bud_cnt; -+ -+ struct rw_semaphore commit_sem; -+ int cmt_state; -+ spinlock_t cs_lock; -+ wait_queue_head_t cmt_wq; -+ -+ unsigned int big_lpt:1; -+ unsigned int no_chk_data_crc:1; -+ unsigned int bulk_read:1; -+ unsigned int default_compr:2; -+ -+ struct mutex tnc_mutex; -+ struct ubifs_zbranch zroot; -+ struct ubifs_znode *cnext; -+ struct ubifs_znode *enext; -+ int *gap_lebs; -+ void *cbuf; -+ void *ileb_buf; -+ int ileb_len; -+ int ihead_lnum; -+ int ihead_offs; -+ int *ilebs; -+ int ileb_cnt; -+ int ileb_nxt; -+ struct rb_root old_idx; -+ int *bottom_up_buf; -+ -+ struct ubifs_mst_node *mst_node; -+ int mst_offs; -+ struct mutex mst_mutex; -+ -+ int max_bu_buf_len; -+ struct mutex bu_mutex; -+ struct bu_info bu; -+ -+ int log_lebs; -+ long long log_bytes; -+ int log_last; -+ int lpt_lebs; -+ int lpt_first; -+ int lpt_last; -+ int orph_lebs; -+ int orph_first; -+ int orph_last; -+ int main_lebs; -+ int main_first; -+ long long main_bytes; -+ -+ uint8_t key_hash_type; -+ uint32_t (*key_hash)(const char *str, int len); -+ int key_fmt; -+ int key_len; -+ int fanout; -+ -+ int min_io_size; -+ int min_io_shift; -+ int leb_size; -+ int half_leb_size; -+ int leb_cnt; -+ int max_leb_cnt; -+ int old_leb_cnt; -+ int ro_media; -+ -+ atomic_long_t dirty_pg_cnt; -+ atomic_long_t dirty_zn_cnt; -+ atomic_long_t clean_zn_cnt; -+ -+ long long budg_idx_growth; -+ long long budg_data_growth; -+ long long budg_dd_growth; -+ long long budg_uncommitted_idx; -+ spinlock_t space_lock; -+ int min_idx_lebs; -+ unsigned long long old_idx_sz; -+ unsigned long long calc_idx_sz; -+ struct ubifs_lp_stats lst; -+ unsigned int nospace:1; -+ unsigned int nospace_rp:1; -+ -+ int page_budget; -+ int inode_budget; -+ int dent_budget; -+ -+ int ref_node_alsz; -+ int mst_node_alsz; -+ int min_idx_node_sz; -+ int max_idx_node_sz; -+ long long max_inode_sz; -+ int max_znode_sz; -+ -+ int leb_overhead; -+ int dead_wm; -+ int dark_wm; -+ int block_cnt; -+ -+ struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT]; -+ struct ubi_volume_desc *ubi; -+ struct ubi_device_info di; -+ struct ubi_volume_info vi; -+ -+ struct rb_root orph_tree; -+ struct list_head orph_list; -+ struct list_head orph_new; -+ struct ubifs_orphan *orph_cnext; -+ struct ubifs_orphan *orph_dnext; -+ spinlock_t orphan_lock; -+ void *orph_buf; -+ int new_orphans; -+ int cmt_orphans; -+ int tot_orphans; -+ int max_orphans; -+ int ohead_lnum; -+ int ohead_offs; -+ int no_orphs; -+ -+ struct task_struct *bgt; -+ char bgt_name[sizeof(BGT_NAME_PATTERN) + 9]; -+ int need_bgt; -+ int need_wbuf_sync; -+ -+ int gc_lnum; -+ void *sbuf; -+ struct list_head idx_gc; -+ int idx_gc_cnt; -+ int gc_seq; -+ int gced_lnum; -+ -+ struct list_head infos_list; -+ struct mutex umount_mutex; -+ unsigned int shrinker_run_no; -+ -+ int space_bits; -+ int lpt_lnum_bits; -+ int lpt_offs_bits; -+ int lpt_spc_bits; -+ int pcnt_bits; -+ int lnum_bits; -+ int nnode_sz; -+ int pnode_sz; -+ int ltab_sz; -+ int lsave_sz; -+ int pnode_cnt; -+ int nnode_cnt; -+ int lpt_hght; -+ int pnodes_have; -+ -+ struct mutex lp_mutex; -+ int lpt_lnum; -+ int lpt_offs; -+ int nhead_lnum; -+ int nhead_offs; -+ int lpt_drty_flgs; -+ int dirty_nn_cnt; -+ int dirty_pn_cnt; -+ int check_lpt_free; -+ long long lpt_sz; -+ void *lpt_nod_buf; -+ void *lpt_buf; -+ struct ubifs_nnode *nroot; -+ struct ubifs_cnode *lpt_cnext; -+ struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT]; -+ struct ubifs_lpt_heap dirty_idx; -+ struct list_head uncat_list; -+ struct list_head empty_list; -+ struct list_head freeable_list; -+ struct list_head frdi_idx_list; -+ int freeable_cnt; -+ -+ int ltab_lnum; -+ int ltab_offs; -+ struct ubifs_lpt_lprops *ltab; -+ struct ubifs_lpt_lprops *ltab_cmt; -+ int lsave_cnt; -+ int lsave_lnum; -+ int lsave_offs; -+ int *lsave; -+ int lscan_lnum; -+ -+ long long rp_size; -+ long long report_rp_size; -+ uid_t rp_uid; -+ gid_t rp_gid; -+ -+ /* The below fields are used only during mounting and re-mounting */ -+ int empty; -+ struct rb_root replay_tree; -+ struct list_head replay_list; -+ struct list_head replay_buds; -+ unsigned long long cs_sqnum; -+ unsigned long long replay_sqnum; -+ int need_recovery; -+ int replaying; -+ struct list_head unclean_leb_list; -+ struct ubifs_mst_node *rcvrd_mst_node; -+ struct rb_root size_tree; -+ int remounting_rw; -+ int always_chk_crc; -+ struct ubifs_mount_opts mount_opts; -+ -+#ifdef CONFIG_UBIFS_FS_DEBUG -+ struct ubifs_debug_info *dbg; -+#endif -+}; -+ -+extern struct list_head ubifs_infos; -+extern spinlock_t ubifs_infos_lock; -+extern atomic_long_t ubifs_clean_zn_cnt; -+extern struct kmem_cache *ubifs_inode_slab; -+extern const struct super_operations ubifs_super_operations; -+extern const struct address_space_operations ubifs_file_address_operations; -+extern const struct file_operations ubifs_file_operations; -+extern const struct inode_operations ubifs_file_inode_operations; -+extern const struct file_operations ubifs_dir_operations; -+extern const struct inode_operations ubifs_dir_inode_operations; -+extern const struct inode_operations ubifs_symlink_inode_operations; -+extern struct backing_dev_info ubifs_backing_dev_info; -+extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; -+ -+/* io.c */ -+void ubifs_ro_mode(struct ubifs_info *c, int err); -+int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); -+int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, -+ int dtype); -+int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); -+int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, -+ int lnum, int offs); -+int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, -+ int lnum, int offs); -+int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, -+ int offs, int dtype); -+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, -+ int offs, int quiet, int must_chk_crc); -+void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); -+void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); -+int ubifs_io_init(struct ubifs_info *c); -+void ubifs_pad(const struct ubifs_info *c, void *buf, int pad); -+int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf); -+int ubifs_bg_wbufs_sync(struct ubifs_info *c); -+void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum); -+int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); -+ -+/* scan.c */ -+struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, -+ int offs, void *sbuf); -+void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); -+int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, -+ int offs, int quiet); -+struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, -+ int offs, void *sbuf); -+void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, -+ int lnum, int offs); -+int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, -+ void *buf, int offs); -+void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, -+ void *buf); -+ -+/* log.c */ -+void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud); -+void ubifs_create_buds_lists(struct ubifs_info *c); -+int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs); -+struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum); -+struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum); -+int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum); -+int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum); -+int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum); -+int ubifs_consolidate_log(struct ubifs_info *c); -+ -+/* journal.c */ -+int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, -+ const struct qstr *nm, const struct inode *inode, -+ int deletion, int xent); -+int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, -+ const union ubifs_key *key, const void *buf, int len); -+int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); -+int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); -+int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, -+ const struct dentry *old_dentry, -+ const struct inode *new_dir, -+ const struct dentry *new_dentry, int sync); -+int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, -+ loff_t old_size, loff_t new_size); -+int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, -+ const struct inode *inode, const struct qstr *nm); -+int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1, -+ const struct inode *inode2); -+ -+/* budget.c */ -+int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req); -+void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req); -+void ubifs_release_dirty_inode_budget(struct ubifs_info *c, -+ struct ubifs_inode *ui); -+int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode, -+ struct ubifs_budget_req *req); -+void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, -+ struct ubifs_budget_req *req); -+void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, -+ struct ubifs_budget_req *req); -+long long ubifs_get_free_space(struct ubifs_info *c); -+long long ubifs_get_free_space_nolock(struct ubifs_info *c); -+int ubifs_calc_min_idx_lebs(struct ubifs_info *c); -+void ubifs_convert_page_budget(struct ubifs_info *c); -+long long ubifs_reported_space(const struct ubifs_info *c, long long free); -+long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); -+ -+/* find.c */ -+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, -+ int squeeze); -+int ubifs_find_free_leb_for_idx(struct ubifs_info *c); -+int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, -+ int min_space, int pick_free); -+int ubifs_find_dirty_idx_leb(struct ubifs_info *c); -+int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); -+ -+/* tnc.c */ -+int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, -+ struct ubifs_znode **zn, int *n); -+int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, -+ void *node, const struct qstr *nm); -+int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, -+ void *node, int *lnum, int *offs); -+int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, -+ int offs, int len); -+int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, -+ int old_lnum, int old_offs, int lnum, int offs, int len); -+int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, -+ int lnum, int offs, int len, const struct qstr *nm); -+int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key); -+int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, -+ const struct qstr *nm); -+int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, -+ union ubifs_key *to_key); -+int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum); -+struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, -+ union ubifs_key *key, -+ const struct qstr *nm); -+void ubifs_tnc_close(struct ubifs_info *c); -+int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, -+ int lnum, int offs, int is_idx); -+int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, -+ int lnum, int offs); -+/* Shared by tnc.c for tnc_commit.c */ -+void destroy_old_idx(struct ubifs_info *c); -+int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, -+ int lnum, int offs); -+int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); -+int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu); -+int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu); -+ -+/* tnc_misc.c */ -+struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, -+ struct ubifs_znode *znode); -+int ubifs_search_zbranch(const struct ubifs_info *c, -+ const struct ubifs_znode *znode, -+ const union ubifs_key *key, int *n); -+struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode); -+struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode); -+long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr); -+struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, -+ struct ubifs_zbranch *zbr, -+ struct ubifs_znode *parent, int iip); -+int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, -+ void *node); -+ -+/* tnc_commit.c */ -+int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); -+int ubifs_tnc_end_commit(struct ubifs_info *c); -+ -+/* shrinker.c */ -+int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); -+ -+/* commit.c */ -+int ubifs_bg_thread(void *info); -+void ubifs_commit_required(struct ubifs_info *c); -+void ubifs_request_bg_commit(struct ubifs_info *c); -+int ubifs_run_commit(struct ubifs_info *c); -+void ubifs_recovery_commit(struct ubifs_info *c); -+int ubifs_gc_should_commit(struct ubifs_info *c); -+void ubifs_wait_for_commit(struct ubifs_info *c); -+ -+/* master.c */ -+int ubifs_read_master(struct ubifs_info *c); -+int ubifs_write_master(struct ubifs_info *c); -+ -+/* sb.c */ -+int ubifs_read_superblock(struct ubifs_info *c); -+struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); -+int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); -+ -+/* replay.c */ -+int ubifs_validate_entry(struct ubifs_info *c, -+ const struct ubifs_dent_node *dent); -+int ubifs_replay_journal(struct ubifs_info *c); -+ -+/* gc.c */ -+int ubifs_garbage_collect(struct ubifs_info *c, int anyway); -+int ubifs_gc_start_commit(struct ubifs_info *c); -+int ubifs_gc_end_commit(struct ubifs_info *c); -+void ubifs_destroy_idx_gc(struct ubifs_info *c); -+int ubifs_get_idx_gc_leb(struct ubifs_info *c); -+int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); -+ -+/* orphan.c */ -+int ubifs_add_orphan(struct ubifs_info *c, ino_t inum); -+void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); -+int ubifs_orphan_start_commit(struct ubifs_info *c); -+int ubifs_orphan_end_commit(struct ubifs_info *c); -+int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); -+int ubifs_clear_orphans(struct ubifs_info *c); -+ -+/* lpt.c */ -+int ubifs_calc_lpt_geom(struct ubifs_info *c); -+int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, -+ int *lpt_lebs, int *big_lpt); -+int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr); -+struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum); -+struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum); -+int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, -+ ubifs_lpt_scan_callback scan_cb, void *data); -+ -+/* Shared by lpt.c for lpt_commit.c */ -+void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave); -+void ubifs_pack_ltab(struct ubifs_info *c, void *buf, -+ struct ubifs_lpt_lprops *ltab); -+void ubifs_pack_pnode(struct ubifs_info *c, void *buf, -+ struct ubifs_pnode *pnode); -+void ubifs_pack_nnode(struct ubifs_info *c, void *buf, -+ struct ubifs_nnode *nnode); -+struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, -+ struct ubifs_nnode *parent, int iip); -+struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, -+ struct ubifs_nnode *parent, int iip); -+int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip); -+void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); -+void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); -+uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); -+struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); -+/* Needed only in debugging code in lpt_commit.c */ -+int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, -+ struct ubifs_nnode *nnode); -+ -+/* lpt_commit.c */ -+int ubifs_lpt_start_commit(struct ubifs_info *c); -+int ubifs_lpt_end_commit(struct ubifs_info *c); -+int ubifs_lpt_post_commit(struct ubifs_info *c); -+void ubifs_lpt_free(struct ubifs_info *c, int wr_only); -+ -+/* lprops.c */ -+const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, -+ const struct ubifs_lprops *lp, -+ int free, int dirty, int flags, -+ int idx_gc_cnt); -+void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst); -+void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, -+ int cat); -+void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, -+ struct ubifs_lprops *new_lprops); -+void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops); -+int ubifs_categorize_lprops(const struct ubifs_info *c, -+ const struct ubifs_lprops *lprops); -+int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, -+ int flags_set, int flags_clean, int idx_gc_cnt); -+int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, -+ int flags_set, int flags_clean); -+int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp); -+const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); -+const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); -+const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); -+const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); -+ -+/* file.c */ -+int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); -+int ubifs_setattr(struct dentry *dentry, struct iattr *attr); -+ -+/* dir.c */ -+struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, -+ int mode); -+int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, -+ struct kstat *stat); -+ -+/* xattr.c */ -+int ubifs_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags); -+ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, -+ size_t size); -+ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); -+int ubifs_removexattr(struct dentry *dentry, const char *name); -+ -+/* super.c */ -+struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); -+ -+/* recovery.c */ -+int ubifs_recover_master_node(struct ubifs_info *c); -+int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); -+struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, -+ int offs, void *sbuf, int grouped); -+struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, -+ int offs, void *sbuf); -+int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -+int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); -+int ubifs_rcvry_gc_commit(struct ubifs_info *c); -+int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, -+ int deletion, loff_t new_size); -+int ubifs_recover_size(struct ubifs_info *c); -+void ubifs_destroy_size_tree(struct ubifs_info *c); -+ -+/* ioctl.c */ -+long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -+void ubifs_set_inode_flags(struct inode *inode); -+#ifdef CONFIG_COMPAT -+long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -+#endif -+ -+/* compressor.c */ -+int __init ubifs_compressors_init(void); -+void ubifs_compressors_exit(void); -+void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, -+ int *compr_type); -+int ubifs_decompress(const void *buf, int len, void *out, int *out_len, -+ int compr_type); -+ -+#include "debug.h" -+#include "misc.h" -+#include "key.h" -+ -+#endif /* !__UBIFS_H__ */ -diff -Nurd linux-2.6.24.orig/fs/ubifs/xattr.c linux-2.6.24/fs/ubifs/xattr.c ---- linux-2.6.24.orig/fs/ubifs/xattr.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.24/fs/ubifs/xattr.c 2009-04-17 09:49:28.000000000 +0200 -@@ -0,0 +1,571 @@ -+/* -+ * This file is part of UBIFS. -+ * -+ * Copyright (C) 2006-2008 Nokia Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 as published by -+ * the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ * more details. -+ * -+ * You should have received a copy of the GNU General Public License along with -+ * this program; if not, write to the Free Software Foundation, Inc., 51 -+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Authors: Artem Bityutskiy (Битюцкий Артём) -+ * Adrian Hunter -+ */ -+ -+/* -+ * This file implements UBIFS extended attributes support. -+ * -+ * Extended attributes are implemented as regular inodes with attached data, -+ * which limits extended attribute size to UBIFS block size (4KiB). Names of -+ * extended attributes are described by extended attribute entries (xentries), -+ * which are almost identical to directory entries, but have different key type. -+ * -+ * In other words, the situation with extended attributes is very similar to -+ * directories. Indeed, any inode (but of course not xattr inodes) may have a -+ * number of associated xentries, just like directory inodes have associated -+ * directory entries. Extended attribute entries store the name of the extended -+ * attribute, the host inode number, and the extended attribute inode number. -+ * Similarly, direntries store the name, the parent and the target inode -+ * numbers. Thus, most of the common UBIFS mechanisms may be re-used for -+ * extended attributes. -+ * -+ * The number of extended attributes is not limited, but there is Linux -+ * limitation on the maximum possible size of the list of all extended -+ * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure -+ * the sum of all extended attribute names of the inode does not exceed that -+ * limit. -+ * -+ * Extended attributes are synchronous, which means they are written to the -+ * flash media synchronously and there is no write-back for extended attribute -+ * inodes. The extended attribute values are not stored in compressed form on -+ * the media. -+ * -+ * Since extended attributes are represented by regular inodes, they are cached -+ * in the VFS inode cache. The xentries are cached in the LNC cache (see -+ * tnc.c). -+ * -+ * ACL support is not implemented. -+ */ -+ -+#include <linux/xattr.h> -+#include <linux/posix_acl_xattr.h> -+#include "ubifs.h" -+ -+/* -+ * Limit the number of extended attributes per inode so that the total size -+ * (@xattr_size) is guaranteeded to fit in an 'unsigned int'. -+ */ -+#define MAX_XATTRS_PER_INODE 65535 -+ -+/* -+ * Extended attribute type constants. -+ * -+ * USER_XATTR: user extended attribute ("user.*") -+ * TRUSTED_XATTR: trusted extended attribute ("trusted.*) -+ * SECURITY_XATTR: security extended attribute ("security.*") -+ */ -+enum { -+ USER_XATTR, -+ TRUSTED_XATTR, -+ SECURITY_XATTR, -+}; -+ -+static struct inode_operations none_inode_operations; -+static struct address_space_operations none_address_operations; -+static struct file_operations none_file_operations; -+ -+/** -+ * create_xattr - create an extended attribute. -+ * @c: UBIFS file-system description object -+ * @host: host inode -+ * @nm: extended attribute name -+ * @value: extended attribute value -+ * @size: size of extended attribute value -+ * -+ * This is a helper function which creates an extended attribute of name @nm -+ * and value @value for inode @host. The host inode is also updated on flash -+ * because the ctime and extended attribute accounting data changes. This -+ * function returns zero in case of success and a negative error code in case -+ * of failure. -+ */ -+static int create_xattr(struct ubifs_info *c, struct inode *host, -+ const struct qstr *nm, const void *value, int size) -+{ -+ int err; -+ struct inode *inode; -+ struct ubifs_inode *ui, *host_ui = ubifs_inode(host); -+ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, -+ .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1, -+ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; -+ -+ if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) -+ return -ENOSPC; -+ /* -+ * Linux limits the maximum size of the extended attribute names list -+ * to %XATTR_LIST_MAX. This means we should not allow creating more -+ * extended attributes if the name list becomes larger. This limitation -+ * is artificial for UBIFS, though. -+ */ -+ if (host_ui->xattr_names + host_ui->xattr_cnt + -+ nm->len + 1 > XATTR_LIST_MAX) -+ return -ENOSPC; -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_budg; -+ } -+ -+ /* Re-define all operations to be "nothing" */ -+ inode->i_mapping->a_ops = &none_address_operations; -+ inode->i_op = &none_inode_operations; -+ inode->i_fop = &none_file_operations; -+ -+ inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; -+ ui = ubifs_inode(inode); -+ ui->xattr = 1; -+ ui->flags |= UBIFS_XATTR_FL; -+ ui->data = kmalloc(size, GFP_NOFS); -+ if (!ui->data) { -+ err = -ENOMEM; -+ goto out_free; -+ } -+ memcpy(ui->data, value, size); -+ inode->i_size = ui->ui_size = size; -+ ui->data_len = size; -+ -+ mutex_lock(&host_ui->ui_mutex); -+ host->i_ctime = ubifs_current_time(host); -+ host_ui->xattr_cnt += 1; -+ host_ui->xattr_size += CALC_DENT_SIZE(nm->len); -+ host_ui->xattr_size += CALC_XATTR_BYTES(size); -+ host_ui->xattr_names += nm->len; -+ -+ err = ubifs_jnl_update(c, host, nm, inode, 0, 1); -+ if (err) -+ goto out_cancel; -+ mutex_unlock(&host_ui->ui_mutex); -+ -+ ubifs_release_budget(c, &req); -+ insert_inode_hash(inode); -+ iput(inode); -+ return 0; -+ -+out_cancel: -+ host_ui->xattr_cnt -= 1; -+ host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); -+ host_ui->xattr_size -= CALC_XATTR_BYTES(size); -+ mutex_unlock(&host_ui->ui_mutex); -+out_free: -+ make_bad_inode(inode); -+ iput(inode); -+out_budg: -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+/** -+ * change_xattr - change an extended attribute. -+ * @c: UBIFS file-system description object -+ * @host: host inode -+ * @inode: extended attribute inode -+ * @value: extended attribute value -+ * @size: size of extended attribute value -+ * -+ * This helper function changes the value of extended attribute @inode with new -+ * data from @value. Returns zero in case of success and a negative error code -+ * in case of failure. -+ */ -+static int change_xattr(struct ubifs_info *c, struct inode *host, -+ struct inode *inode, const void *value, int size) -+{ -+ int err; -+ struct ubifs_inode *host_ui = ubifs_inode(host); -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ struct ubifs_budget_req req = { .dirtied_ino = 2, -+ .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; -+ -+ ubifs_assert(ui->data_len == inode->i_size); -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ kfree(ui->data); -+ ui->data = kmalloc(size, GFP_NOFS); -+ if (!ui->data) { -+ err = -ENOMEM; -+ goto out_free; -+ } -+ memcpy(ui->data, value, size); -+ inode->i_size = ui->ui_size = size; -+ ui->data_len = size; -+ -+ mutex_lock(&host_ui->ui_mutex); -+ host->i_ctime = ubifs_current_time(host); -+ host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); -+ host_ui->xattr_size += CALC_XATTR_BYTES(size); -+ -+ /* -+ * It is important to write the host inode after the xattr inode -+ * because if the host inode gets synchronized (via 'fsync()'), then -+ * the extended attribute inode gets synchronized, because it goes -+ * before the host inode in the write-buffer. -+ */ -+ err = ubifs_jnl_change_xattr(c, inode, host); -+ if (err) -+ goto out_cancel; -+ mutex_unlock(&host_ui->ui_mutex); -+ -+ ubifs_release_budget(c, &req); -+ return 0; -+ -+out_cancel: -+ host_ui->xattr_size -= CALC_XATTR_BYTES(size); -+ host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); -+ mutex_unlock(&host_ui->ui_mutex); -+ make_bad_inode(inode); -+out_free: -+ ubifs_release_budget(c, &req); -+ return err; -+} -+ -+/** -+ * check_namespace - check extended attribute name-space. -+ * @nm: extended attribute name -+ * -+ * This function makes sure the extended attribute name belongs to one of the -+ * supported extended attribute name-spaces. Returns name-space index in case -+ * of success and a negative error code in case of failure. -+ */ -+static int check_namespace(const struct qstr *nm) -+{ -+ int type; -+ -+ if (nm->len > UBIFS_MAX_NLEN) -+ return -ENAMETOOLONG; -+ -+ if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, -+ XATTR_TRUSTED_PREFIX_LEN)) { -+ if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') -+ return -EINVAL; -+ type = TRUSTED_XATTR; -+ } else if (!strncmp(nm->name, XATTR_USER_PREFIX, -+ XATTR_USER_PREFIX_LEN)) { -+ if (nm->name[XATTR_USER_PREFIX_LEN] == '\0') -+ return -EINVAL; -+ type = USER_XATTR; -+ } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, -+ XATTR_SECURITY_PREFIX_LEN)) { -+ if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') -+ return -EINVAL; -+ type = SECURITY_XATTR; -+ } else -+ return -EOPNOTSUPP; -+ -+ return type; -+} -+ -+static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) -+{ -+ struct inode *inode; -+ -+ inode = ubifs_iget(c->vfs_sb, inum); -+ if (IS_ERR(inode)) { -+ ubifs_err("dead extended attribute entry, error %d", -+ (int)PTR_ERR(inode)); -+ return inode; -+ } -+ if (ubifs_inode(inode)->xattr) -+ return inode; -+ ubifs_err("corrupt extended attribute entry"); -+ iput(inode); -+ return ERR_PTR(-EINVAL); -+} -+ -+int ubifs_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct inode *inode, *host = dentry->d_inode; -+ struct ubifs_info *c = host->i_sb->s_fs_info; -+ struct qstr nm = { .name = name, .len = strlen(name) }; -+ struct ubifs_dent_node *xent; -+ union ubifs_key key; -+ int err, type; -+ -+ dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, -+ host->i_ino, dentry->d_name.len, dentry->d_name.name, size); -+ ubifs_assert(mutex_is_locked(&host->i_mutex)); -+ -+ if (size > UBIFS_MAX_INO_DATA) -+ return -ERANGE; -+ -+ type = check_namespace(&nm); -+ if (type < 0) -+ return type; -+ -+ xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); -+ if (!xent) -+ return -ENOMEM; -+ -+ /* -+ * The extended attribute entries are stored in LNC, so multiple -+ * look-ups do not involve reading the flash. -+ */ -+ xent_key_init(c, &key, host->i_ino, &nm); -+ err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); -+ if (err) { -+ if (err != -ENOENT) -+ goto out_free; -+ -+ if (flags & XATTR_REPLACE) -+ /* We are asked not to create the xattr */ -+ err = -ENODATA; -+ else -+ err = create_xattr(c, host, &nm, value, size); -+ goto out_free; -+ } -+ -+ if (flags & XATTR_CREATE) { -+ /* We are asked not to replace the xattr */ -+ err = -EEXIST; -+ goto out_free; -+ } -+ -+ inode = iget_xattr(c, le64_to_cpu(xent->inum)); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_free; -+ } -+ -+ err = change_xattr(c, host, inode, value, size); -+ iput(inode); -+ -+out_free: -+ kfree(xent); -+ return err; -+} -+ -+ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, -+ size_t size) -+{ -+ struct inode *inode, *host = dentry->d_inode; -+ struct ubifs_info *c = host->i_sb->s_fs_info; -+ struct qstr nm = { .name = name, .len = strlen(name) }; -+ struct ubifs_inode *ui; -+ struct ubifs_dent_node *xent; -+ union ubifs_key key; -+ int err; -+ -+ dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, -+ host->i_ino, dentry->d_name.len, dentry->d_name.name, size); -+ -+ err = check_namespace(&nm); -+ if (err < 0) -+ return err; -+ -+ xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); -+ if (!xent) -+ return -ENOMEM; -+ -+ xent_key_init(c, &key, host->i_ino, &nm); -+ err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); -+ if (err) { -+ if (err == -ENOENT) -+ err = -ENODATA; -+ goto out_unlock; -+ } -+ -+ inode = iget_xattr(c, le64_to_cpu(xent->inum)); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_unlock; -+ } -+ -+ ui = ubifs_inode(inode); -+ ubifs_assert(inode->i_size == ui->data_len); -+ ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); -+ -+ if (buf) { -+ /* If @buf is %NULL we are supposed to return the length */ -+ if (ui->data_len > size) { -+ dbg_err("buffer size %zd, xattr len %d", -+ size, ui->data_len); -+ err = -ERANGE; -+ goto out_iput; -+ } -+ -+ memcpy(buf, ui->data, ui->data_len); -+ } -+ err = ui->data_len; -+ -+out_iput: -+ iput(inode); -+out_unlock: -+ kfree(xent); -+ return err; -+} -+ -+ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ union ubifs_key key; -+ struct inode *host = dentry->d_inode; -+ struct ubifs_info *c = host->i_sb->s_fs_info; -+ struct ubifs_inode *host_ui = ubifs_inode(host); -+ struct ubifs_dent_node *xent, *pxent = NULL; -+ int err, len, written = 0; -+ struct qstr nm = { .name = NULL }; -+ -+ dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, -+ dentry->d_name.len, dentry->d_name.name, size); -+ -+ len = host_ui->xattr_names + host_ui->xattr_cnt; -+ if (!buffer) -+ /* -+ * We should return the minimum buffer size which will fit a -+ * null-terminated list of all the extended attribute names. -+ */ -+ return len; -+ -+ if (len > size) -+ return -ERANGE; -+ -+ lowest_xent_key(c, &key, host->i_ino); -+ while (1) { -+ int type; -+ -+ xent = ubifs_tnc_next_ent(c, &key, &nm); -+ if (IS_ERR(xent)) { -+ err = PTR_ERR(xent); -+ break; -+ } -+ -+ nm.name = xent->name; -+ nm.len = le16_to_cpu(xent->nlen); -+ -+ type = check_namespace(&nm); -+ if (unlikely(type < 0)) { -+ err = type; -+ break; -+ } -+ -+ /* Show trusted namespace only for "power" users */ -+ if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) { -+ memcpy(buffer + written, nm.name, nm.len + 1); -+ written += nm.len + 1; -+ } -+ -+ kfree(pxent); -+ pxent = xent; -+ key_read(c, &xent->key, &key); -+ } -+ -+ kfree(pxent); -+ if (err != -ENOENT) { -+ ubifs_err("cannot find next direntry, error %d", err); -+ return err; -+ } -+ -+ ubifs_assert(written <= size); -+ return written; -+} -+ -+static int remove_xattr(struct ubifs_info *c, struct inode *host, -+ struct inode *inode, const struct qstr *nm) -+{ -+ int err; -+ struct ubifs_inode *host_ui = ubifs_inode(host); -+ struct ubifs_inode *ui = ubifs_inode(inode); -+ struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1, -+ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; -+ -+ ubifs_assert(ui->data_len == inode->i_size); -+ -+ err = ubifs_budget_space(c, &req); -+ if (err) -+ return err; -+ -+ mutex_lock(&host_ui->ui_mutex); -+ host->i_ctime = ubifs_current_time(host); -+ host_ui->xattr_cnt -= 1; -+ host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); -+ host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); -+ host_ui->xattr_names -= nm->len; -+ -+ err = ubifs_jnl_delete_xattr(c, host, inode, nm); -+ if (err) -+ goto out_cancel; -+ mutex_unlock(&host_ui->ui_mutex); -+ -+ ubifs_release_budget(c, &req); -+ return 0; -+ -+out_cancel: -+ host_ui->xattr_cnt += 1; -+ host_ui->xattr_size += CALC_DENT_SIZE(nm->len); -+ host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); -+ mutex_unlock(&host_ui->ui_mutex); -+ ubifs_release_budget(c, &req); -+ make_bad_inode(inode); -+ return err; -+} -+ -+int ubifs_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct inode *inode, *host = dentry->d_inode; -+ struct ubifs_info *c = host->i_sb->s_fs_info; -+ struct qstr nm = { .name = name, .len = strlen(name) }; -+ struct ubifs_dent_node *xent; -+ union ubifs_key key; -+ int err; -+ -+ dbg_gen("xattr '%s', ino %lu ('%.*s')", name, -+ host->i_ino, dentry->d_name.len, dentry->d_name.name); -+ ubifs_assert(mutex_is_locked(&host->i_mutex)); -+ -+ err = check_namespace(&nm); -+ if (err < 0) -+ return err; -+ -+ xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); -+ if (!xent) -+ return -ENOMEM; -+ -+ xent_key_init(c, &key, host->i_ino, &nm); -+ err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); -+ if (err) { -+ if (err == -ENOENT) -+ err = -ENODATA; -+ goto out_free; -+ } -+ -+ inode = iget_xattr(c, le64_to_cpu(xent->inum)); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ goto out_free; -+ } -+ -+ ubifs_assert(inode->i_nlink == 1); -+ inode->i_nlink = 0; -+ err = remove_xattr(c, host, inode, &nm); -+ if (err) -+ inode->i_nlink = 1; -+ -+ /* If @i_nlink is 0, 'iput()' will delete the inode */ -+ iput(inode); -+ -+out_free: -+ kfree(xent); -+ return err; -+} -diff -Nurd linux-2.6.24.orig/include/linux/fs.h linux-2.6.24/include/linux/fs.h ---- linux-2.6.24.orig/include/linux/fs.h 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/include/linux/fs.h 2009-04-17 09:49:28.000000000 +0200 -@@ -1672,6 +1672,8 @@ - extern int invalidate_inode_pages2(struct address_space *mapping); - extern int invalidate_inode_pages2_range(struct address_space *mapping, - pgoff_t start, pgoff_t end); -+extern void generic_sync_sb_inodes(struct super_block *sb, -+ struct writeback_control *wbc); - extern int write_inode_now(struct inode *, int); - extern int filemap_fdatawrite(struct address_space *); - extern int filemap_flush(struct address_space *); -diff -Nurd linux-2.6.24.orig/include/linux/mtd/ubi.h linux-2.6.24/include/linux/mtd/ubi.h ---- linux-2.6.24.orig/include/linux/mtd/ubi.h 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/include/linux/mtd/ubi.h 2009-04-17 09:49:28.000000000 +0200 -@@ -26,23 +26,6 @@ - #include <mtd/ubi-user.h> - - /* -- * UBI data type hint constants. -- * -- * UBI_LONGTERM: long-term data -- * UBI_SHORTTERM: short-term data -- * UBI_UNKNOWN: data persistence is unknown -- * -- * These constants are used when data is written to UBI volumes in order to -- * help the UBI wear-leveling unit to find more appropriate physical -- * eraseblocks. -- */ --enum { -- UBI_LONGTERM = 1, -- UBI_SHORTTERM, -- UBI_UNKNOWN --}; -- --/* - * enum ubi_open_mode - UBI volume open mode constants. - * - * UBI_READONLY: read-only mode -@@ -62,13 +45,13 @@ - * @size: how many physical eraseblocks are reserved for this volume - * @used_bytes: how many bytes of data this volume contains - * @used_ebs: how many physical eraseblocks of this volume actually contain any -- * data -+ * data - * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) - * @corrupted: non-zero if the volume is corrupted (static volumes only) - * @upd_marker: non-zero if the volume has update marker set - * @alignment: volume alignment - * @usable_leb_size: how many bytes are available in logical eraseblocks of -- * this volume -+ * this volume - * @name_len: volume name length - * @name: volume name - * @cdev: UBI volume character device major and minor numbers -@@ -167,7 +150,9 @@ - int len, int dtype); - int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum); - int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum); -+int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); - int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum); -+int ubi_sync(int ubi_num); - - /* - * This function is the same as the 'ubi_leb_read()' function, but it does not -diff -Nurd linux-2.6.24.orig/include/mtd/Kbuild linux-2.6.24/include/mtd/Kbuild ---- linux-2.6.24.orig/include/mtd/Kbuild 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/include/mtd/Kbuild 2009-04-17 09:49:28.000000000 +0200 -@@ -3,5 +3,4 @@ - header-y += mtd-abi.h - header-y += mtd-user.h - header-y += nftl-user.h --header-y += ubi-header.h - header-y += ubi-user.h -diff -Nurd linux-2.6.24.orig/include/mtd/ubi-user.h linux-2.6.24/include/mtd/ubi-user.h ---- linux-2.6.24.orig/include/mtd/ubi-user.h 2009-04-17 09:45:11.000000000 +0200 -+++ linux-2.6.24/include/mtd/ubi-user.h 2009-04-17 09:49:28.000000000 +0200 -@@ -22,33 +22,55 @@ - #define __UBI_USER_H__ - - /* -+ * UBI device creation (the same as MTD device attachment) -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * MTD devices may be attached using %UBI_IOCATT ioctl command of the UBI -+ * control device. The caller has to properly fill and pass -+ * &struct ubi_attach_req object - UBI will attach the MTD device specified in -+ * the request and return the newly created UBI device number as the ioctl -+ * return value. -+ * -+ * UBI device deletion (the same as MTD device detachment) -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * An UBI device maybe deleted with %UBI_IOCDET ioctl command of the UBI -+ * control device. -+ * - * UBI volume creation - * ~~~~~~~~~~~~~~~~~~~ - * -- * UBI volumes are created via the %UBI_IOCMKVOL IOCTL command of UBI character -+ * UBI volumes are created via the %UBI_IOCMKVOL ioctl command of UBI character - * device. A &struct ubi_mkvol_req object has to be properly filled and a -- * pointer to it has to be passed to the IOCTL. -+ * pointer to it has to be passed to the ioctl. - * - * UBI volume deletion - * ~~~~~~~~~~~~~~~~~~~ - * -- * To delete a volume, the %UBI_IOCRMVOL IOCTL command of the UBI character -+ * To delete a volume, the %UBI_IOCRMVOL ioctl command of the UBI character - * device should be used. A pointer to the 32-bit volume ID hast to be passed -- * to the IOCTL. -+ * to the ioctl. - * - * UBI volume re-size - * ~~~~~~~~~~~~~~~~~~ - * -- * To re-size a volume, the %UBI_IOCRSVOL IOCTL command of the UBI character -+ * To re-size a volume, the %UBI_IOCRSVOL ioctl command of the UBI character - * device should be used. A &struct ubi_rsvol_req object has to be properly -- * filled and a pointer to it has to be passed to the IOCTL. -+ * filled and a pointer to it has to be passed to the ioctl. -+ * -+ * UBI volumes re-name -+ * ~~~~~~~~~~~~~~~~~~~ -+ * -+ * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command -+ * of the UBI character device should be used. A &struct ubi_rnvol_req object -+ * has to be properly filled and a pointer to it has to be passed to the ioctl. - * - * UBI volume update - * ~~~~~~~~~~~~~~~~~ - * -- * Volume update should be done via the %UBI_IOCVOLUP IOCTL command of the -+ * Volume update should be done via the %UBI_IOCVOLUP ioctl command of the - * corresponding UBI volume character device. A pointer to a 64-bit update -- * size should be passed to the IOCTL. After then, UBI expects user to write -+ * size should be passed to the ioctl. After this, UBI expects user to write - * this number of bytes to the volume character device. The update is finished - * when the claimed number of bytes is passed. So, the volume update sequence - * is something like: -@@ -57,19 +79,65 @@ - * ioctl(fd, UBI_IOCVOLUP, &image_size); - * write(fd, buf, image_size); - * close(fd); -+ * -+ * Logical eraseblock erase -+ * ~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * To erase a logical eraseblock, the %UBI_IOCEBER ioctl command of the -+ * corresponding UBI volume character device should be used. This command -+ * unmaps the requested logical eraseblock, makes sure the corresponding -+ * physical eraseblock is successfully erased, and returns. -+ * -+ * Atomic logical eraseblock change -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * Atomic logical eraseblock change operation is called using the %UBI_IOCEBCH -+ * ioctl command of the corresponding UBI volume character device. A pointer to -+ * a &struct ubi_leb_change_req object has to be passed to the ioctl. Then the -+ * user is expected to write the requested amount of bytes (similarly to what -+ * should be done in case of the "volume update" ioctl). -+ * -+ * Logical eraseblock map -+ * ~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * To map a logical eraseblock to a physical eraseblock, the %UBI_IOCEBMAP -+ * ioctl command should be used. A pointer to a &struct ubi_map_req object is -+ * expected to be passed. The ioctl maps the requested logical eraseblock to -+ * a physical eraseblock and returns. Only non-mapped logical eraseblocks can -+ * be mapped. If the logical eraseblock specified in the request is already -+ * mapped to a physical eraseblock, the ioctl fails and returns error. -+ * -+ * Logical eraseblock unmap -+ * ~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * To unmap a logical eraseblock to a physical eraseblock, the %UBI_IOCEBUNMAP -+ * ioctl command should be used. The ioctl unmaps the logical eraseblocks, -+ * schedules corresponding physical eraseblock for erasure, and returns. Unlike -+ * the "LEB erase" command, it does not wait for the physical eraseblock being -+ * erased. Note, the side effect of this is that if an unclean reboot happens -+ * after the unmap ioctl returns, you may find the LEB mapped again to the same -+ * physical eraseblock after the UBI is run again. -+ * -+ * Check if logical eraseblock is mapped -+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -+ * -+ * To check if a logical eraseblock is mapped to a physical eraseblock, the -+ * %UBI_IOCEBISMAP ioctl command should be used. It returns %0 if the LEB is -+ * not mapped, and %1 if it is mapped. - */ - - /* -- * When a new volume is created, users may either specify the volume number they -- * want to create or to let UBI automatically assign a volume number using this -- * constant. -+ * When a new UBI volume or UBI device is created, users may either specify the -+ * volume/device number they want to create or to let UBI automatically assign -+ * the number using these constants. - */ - #define UBI_VOL_NUM_AUTO (-1) -+#define UBI_DEV_NUM_AUTO (-1) - - /* Maximum volume name length */ - #define UBI_MAX_VOLUME_NAME 127 - --/* IOCTL commands of UBI character devices */ -+/* ioctl commands of UBI character devices */ - - #define UBI_IOC_MAGIC 'o' - -@@ -79,15 +147,57 @@ - #define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t) - /* Re-size an UBI volume */ - #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req) -+/* Re-name volumes */ -+#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req) - --/* IOCTL commands of UBI volume character devices */ -+/* ioctl commands of the UBI control character device */ -+ -+#define UBI_CTRL_IOC_MAGIC 'o' -+ -+/* Attach an MTD device */ -+#define UBI_IOCATT _IOW(UBI_CTRL_IOC_MAGIC, 64, struct ubi_attach_req) -+/* Detach an MTD device */ -+#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t) -+ -+/* ioctl commands of UBI volume character devices */ - - #define UBI_VOL_IOC_MAGIC 'O' - - /* Start UBI volume update */ - #define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t) --/* An eraseblock erasure command, used for debugging, disabled by default */ -+/* LEB erasure command, used for debugging, disabled by default */ - #define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t) -+/* Atomic LEB change command */ -+#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t) -+/* Map LEB command */ -+#define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req) -+/* Unmap LEB command */ -+#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, int32_t) -+/* Check if LEB is mapped command */ -+#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, int32_t) -+ -+/* Maximum MTD device name length supported by UBI */ -+#define MAX_UBI_MTD_NAME_LEN 127 -+ -+/* Maximum amount of UBI volumes that can be re-named at one go */ -+#define UBI_MAX_RNVOL 32 -+ -+/* -+ * UBI data type hint constants. -+ * -+ * UBI_LONGTERM: long-term data -+ * UBI_SHORTTERM: short-term data -+ * UBI_UNKNOWN: data persistence is unknown -+ * -+ * These constants are used when data is written to UBI volumes in order to -+ * help the UBI wear-leveling unit to find more appropriate physical -+ * eraseblocks. -+ */ -+enum { -+ UBI_LONGTERM = 1, -+ UBI_SHORTTERM = 2, -+ UBI_UNKNOWN = 3, -+}; - - /* - * UBI volume type constants. -@@ -97,22 +207,58 @@ - */ - enum { - UBI_DYNAMIC_VOLUME = 3, -- UBI_STATIC_VOLUME = 4 -+ UBI_STATIC_VOLUME = 4, -+}; -+ -+/** -+ * struct ubi_attach_req - attach MTD device request. -+ * @ubi_num: UBI device number to create -+ * @mtd_num: MTD device number to attach -+ * @vid_hdr_offset: VID header offset (use defaults if %0) -+ * @padding: reserved for future, not used, has to be zeroed -+ * -+ * This data structure is used to specify MTD device UBI has to attach and the -+ * parameters it has to use. The number which should be assigned to the new UBI -+ * device is passed in @ubi_num. UBI may automatically assign the number if -+ * @UBI_DEV_NUM_AUTO is passed. In this case, the device number is returned in -+ * @ubi_num. -+ * -+ * Most applications should pass %0 in @vid_hdr_offset to make UBI use default -+ * offset of the VID header within physical eraseblocks. The default offset is -+ * the next min. I/O unit after the EC header. For example, it will be offset -+ * 512 in case of a 512 bytes page NAND flash with no sub-page support. Or -+ * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages. -+ * -+ * But in rare cases, if this optimizes things, the VID header may be placed to -+ * a different offset. For example, the boot-loader might do things faster if -+ * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. -+ * As the boot-loader would not normally need to read EC headers (unless it -+ * needs UBI in RW mode), it might be faster to calculate ECC. This is weird -+ * example, but it real-life example. So, in this example, @vid_hdr_offer would -+ * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes -+ * aligned, which is OK, as UBI is clever enough to realize this is 4th -+ * sub-page of the first page and add needed padding. -+ */ -+struct ubi_attach_req { -+ int32_t ubi_num; -+ int32_t mtd_num; -+ int32_t vid_hdr_offset; -+ int8_t padding[12]; - }; - - /** - * struct ubi_mkvol_req - volume description data structure used in -- * volume creation requests. -+ * volume creation requests. - * @vol_id: volume number - * @alignment: volume alignment - * @bytes: volume size in bytes - * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) -- * @padding1: reserved for future, not used -+ * @padding1: reserved for future, not used, has to be zeroed - * @name_len: volume name length -- * @padding2: reserved for future, not used -+ * @padding2: reserved for future, not used, has to be zeroed - * @name: volume name - * -- * This structure is used by userspace programs when creating new volumes. The -+ * This structure is used by user-space programs when creating new volumes. The - * @used_bytes field is only necessary when creating static volumes. - * - * The @alignment field specifies the required alignment of the volume logical -@@ -139,7 +285,7 @@ - int8_t padding1; - int16_t name_len; - int8_t padding2[4]; -- char name[UBI_MAX_VOLUME_NAME+1]; -+ char name[UBI_MAX_VOLUME_NAME + 1]; - } __attribute__ ((packed)); - - /** -@@ -158,4 +304,73 @@ - int32_t vol_id; - } __attribute__ ((packed)); - -+/** -+ * struct ubi_rnvol_req - volumes re-name request. -+ * @count: count of volumes to re-name -+ * @padding1: reserved for future, not used, has to be zeroed -+ * @vol_id: ID of the volume to re-name -+ * @name_len: name length -+ * @padding2: reserved for future, not used, has to be zeroed -+ * @name: new volume name -+ * -+ * UBI allows to re-name up to %32 volumes at one go. The count of volumes to -+ * re-name is specified in the @count field. The ID of the volumes to re-name -+ * and the new names are specified in the @vol_id and @name fields. -+ * -+ * The UBI volume re-name operation is atomic, which means that should power cut -+ * happen, the volumes will have either old name or new name. So the possible -+ * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes -+ * A and B one may create temporary volumes %A1 and %B1 with the new contents, -+ * then atomically re-name A1->A and B1->B, in which case old %A and %B will -+ * be removed. -+ * -+ * If it is not desirable to remove old A and B, the re-name request has to -+ * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1 -+ * become A and B, and old A and B will become A1 and B1. -+ * -+ * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1 -+ * and B1 become A and B, and old A and B become X and Y. -+ * -+ * In other words, in case of re-naming into an existing volume name, the -+ * existing volume is removed, unless it is re-named as well at the same -+ * re-name request. -+ */ -+struct ubi_rnvol_req { -+ int32_t count; -+ int8_t padding1[12]; -+ struct { -+ int32_t vol_id; -+ int16_t name_len; -+ int8_t padding2[2]; -+ char name[UBI_MAX_VOLUME_NAME + 1]; -+ } ents[UBI_MAX_RNVOL]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubi_leb_change_req - a data structure used in atomic LEB change -+ * requests. -+ * @lnum: logical eraseblock number to change -+ * @bytes: how many bytes will be written to the logical eraseblock -+ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) -+ * @padding: reserved for future, not used, has to be zeroed -+ */ -+struct ubi_leb_change_req { -+ int32_t lnum; -+ int32_t bytes; -+ int8_t dtype; -+ int8_t padding[7]; -+} __attribute__ ((packed)); -+ -+/** -+ * struct ubi_map_req - a data structure used in map LEB requests. -+ * @lnum: logical eraseblock number to unmap -+ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) -+ * @padding: reserved for future, not used, has to be zeroed -+ */ -+struct ubi_map_req { -+ int32_t lnum; -+ int8_t dtype; -+ int8_t padding[3]; -+} __attribute__ ((packed)); -+ - #endif /* __UBI_USER_H__ */ diff --git a/recipes/linux/linux-2.6.24/ubifs-v2.6.24.patch b/recipes/linux/linux-2.6.24/ubifs-v2.6.24.patch new file mode 100644 index 0000000000..952e989df4 --- /dev/null +++ b/recipes/linux/linux-2.6.24/ubifs-v2.6.24.patch @@ -0,0 +1,45442 @@ +diff -Nurd linux-2.6.24/Documentation/ABI/stable/sysfs-class-ubi ubifs-v2.6.24/Documentation/ABI/stable/sysfs-class-ubi +--- linux-2.6.24/Documentation/ABI/stable/sysfs-class-ubi 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/Documentation/ABI/stable/sysfs-class-ubi 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,212 @@ ++What: /sys/class/ubi/ ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ The ubi/ class sub-directory belongs to the UBI subsystem and ++ provides general UBI information, per-UBI device information ++ and per-UBI volume information. ++ ++What: /sys/class/ubi/version ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ This file contains version of the latest supported UBI on-media ++ format. Currently it is 1, and there is no plan to change this. ++ However, if in the future UBI needs on-flash format changes ++ which cannot be done in a compatible manner, a new format ++ version will be added. So this is a mechanism for possible ++ future backward-compatible (but forward-incompatible) ++ improvements. ++ ++What: /sys/class/ubiX/ ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ The /sys/class/ubi0, /sys/class/ubi1, etc directories describe ++ UBI devices (UBI device 0, 1, etc). They contain general UBI ++ device information and per UBI volume information (each UBI ++ device may have many UBI volumes) ++ ++What: /sys/class/ubi/ubiX/avail_eraseblocks ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Amount of available logical eraseblock. For example, one may ++ create a new UBI volume which has this amount of logical ++ eraseblocks. ++ ++What: /sys/class/ubi/ubiX/bad_peb_count ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Count of bad physical eraseblocks on the underlying MTD device. ++ ++What: /sys/class/ubi/ubiX/bgt_enabled ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Contains ASCII "0\n" if the UBI background thread is disabled, ++ and ASCII "1\n" if it is enabled. ++ ++What: /sys/class/ubi/ubiX/dev ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Major and minor numbers of the character device corresponding ++ to this UBI device (in <major>:<minor> format). ++ ++What: /sys/class/ubi/ubiX/eraseblock_size ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Maximum logical eraseblock size this UBI device may provide. UBI ++ volumes may have smaller logical eraseblock size because of their ++ alignment. ++ ++What: /sys/class/ubi/ubiX/max_ec ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Maximum physical eraseblock erase counter value. ++ ++What: /sys/class/ubi/ubiX/max_vol_count ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Maximum number of volumes which this UBI device may have. ++ ++What: /sys/class/ubi/ubiX/min_io_size ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Minimum input/output unit size. All the I/O may only be done ++ in fractions of the contained number. ++ ++What: /sys/class/ubi/ubiX/mtd_num ++Date: January 2008 ++KernelVersion: 2.6.25 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Number of the underlying MTD device. ++ ++What: /sys/class/ubi/ubiX/reserved_for_bad ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Number of physical eraseblocks reserved for bad block handling. ++ ++What: /sys/class/ubi/ubiX/total_eraseblocks ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Total number of good (not marked as bad) physical eraseblocks on ++ the underlying MTD device. ++ ++What: /sys/class/ubi/ubiX/volumes_count ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Count of volumes on this UBI device. ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/ ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ The /sys/class/ubi/ubiX/ubiX_0/, /sys/class/ubi/ubiX/ubiX_1/, ++ etc directories describe UBI volumes on UBI device X (volumes ++ 0, 1, etc). ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/alignment ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Volume alignment - the value the logical eraseblock size of ++ this volume has to be aligned on. For example, 2048 means that ++ logical eraseblock size is multiple of 2048. In other words, ++ volume logical eraseblock size is UBI device logical eraseblock ++ size aligned to the alignment value. ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/corrupted ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Contains ASCII "0\n" if the UBI volume is OK, and ASCII "1\n" ++ if it is corrupted (e.g., due to an interrupted volume update). ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/data_bytes ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ The amount of data this volume contains. This value makes sense ++ only for static volumes, and for dynamic volume it equivalent ++ to the total volume size in bytes. ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/dev ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Major and minor numbers of the character device corresponding ++ to this UBI volume (in <major>:<minor> format). ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/name ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Volume name. ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/reserved_ebs ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Count of physical eraseblock reserved for this volume. ++ Equivalent to the volume size in logical eraseblocks. ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/type ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Volume type. Contains ASCII "dynamic\n" for dynamic volumes and ++ "static\n" for static volumes. ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/upd_marker ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Contains ASCII "0\n" if the update marker is not set for this ++ volume, and "1\n" if it is set. The update marker is set when ++ volume update starts, and cleaned when it ends. So the presence ++ of the update marker indicates that the volume is being updated ++ at the moment of the update was interrupted. The later may be ++ checked using the "corrupted" sysfs file. ++ ++What: /sys/class/ubi/ubiX/ubiX_Y/usable_eb_size ++Date: July 2006 ++KernelVersion: 2.6.22 ++Contact: Artem Bityutskiy <dedekind@infradead.org> ++Description: ++ Logical eraseblock size of this volume. Equivalent to logical ++ eraseblock size of the device aligned on the volume alignment ++ value. +diff -Nurd linux-2.6.24/Documentation/filesystems/ubifs.txt ubifs-v2.6.24/Documentation/filesystems/ubifs.txt +--- linux-2.6.24/Documentation/filesystems/ubifs.txt 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/Documentation/filesystems/ubifs.txt 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,169 @@ ++Introduction ++============= ++ ++UBIFS file-system stands for UBI File System. UBI stands for "Unsorted ++Block Images". UBIFS is a flash file system, which means it is designed ++to work with flash devices. It is important to understand, that UBIFS ++is completely different to any traditional file-system in Linux, like ++Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems ++which work with MTD devices, not block devices. The other Linux ++file-system of this class is JFFS2. ++ ++To make it more clear, here is a small comparison of MTD devices and ++block devices. ++ ++1 MTD devices represent flash devices and they consist of eraseblocks of ++ rather large size, typically about 128KiB. Block devices consist of ++ small blocks, typically 512 bytes. ++2 MTD devices support 3 main operations - read from some offset within an ++ eraseblock, write to some offset within an eraseblock, and erase a whole ++ eraseblock. Block devices support 2 main operations - read a whole ++ block and write a whole block. ++3 The whole eraseblock has to be erased before it becomes possible to ++ re-write its contents. Blocks may be just re-written. ++4 Eraseblocks become worn out after some number of erase cycles - ++ typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC ++ NAND flashes. Blocks do not have the wear-out property. ++5 Eraseblocks may become bad (only on NAND flashes) and software should ++ deal with this. Blocks on hard drives typically do not become bad, ++ because hardware has mechanisms to substitute bad blocks, at least in ++ modern LBA disks. ++ ++It should be quite obvious why UBIFS is very different to traditional ++file-systems. ++ ++UBIFS works on top of UBI. UBI is a separate software layer which may be ++found in drivers/mtd/ubi. UBI is basically a volume management and ++wear-leveling layer. It provides so called UBI volumes which is a higher ++level abstraction than a MTD device. The programming model of UBI devices ++is very similar to MTD devices - they still consist of large eraseblocks, ++they have read/write/erase operations, but UBI devices are devoid of ++limitations like wear and bad blocks (items 4 and 5 in the above list). ++ ++In a sense, UBIFS is a next generation of JFFS2 file-system, but it is ++very different and incompatible to JFFS2. The following are the main ++differences. ++ ++* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on ++ top of UBI volumes. ++* JFFS2 does not have on-media index and has to build it while mounting, ++ which requires full media scan. UBIFS maintains the FS indexing ++ information on the flash media and does not require full media scan, ++ so it mounts many times faster than JFFS2. ++* JFFS2 is a write-through file-system, while UBIFS supports write-back, ++ which makes UBIFS much faster on writes. ++ ++Similarly to JFFS2, UBIFS supports on-the-flight compression which makes ++it possible to fit quite a lot of data to the flash. ++ ++Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. ++It does not need stuff like fsck.ext2. UBIFS automatically replays its ++journal and recovers from crashes, ensuring that the on-flash data ++structures are consistent. ++ ++UBIFS scales logarithmically (most of the data structures it uses are ++trees), so the mount time and memory consumption do not linearly depend ++on the flash size, like in case of JFFS2. This is because UBIFS ++maintains the FS index on the flash media. However, UBIFS depends on ++UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly. ++Nevertheless, UBI/UBIFS scales considerably better than JFFS2. ++ ++The authors of UBIFS believe, that it is possible to develop UBI2 which ++would scale logarithmically as well. UBI2 would support the same API as UBI, ++but it would be binary incompatible to UBI. So UBIFS would not need to be ++changed to use UBI2 ++ ++ ++Mount options ++============= ++ ++(*) == default. ++ ++bulk_read read more in one go to take advantage of flash ++ media that read faster sequentially ++no_bulk_read (*) do not bulk-read ++no_chk_data_crc skip checking of CRCs on data nodes in order to ++ improve read performance. Use this option only ++ if the flash media is highly reliable. The effect ++ of this option is that corruption of the contents ++ of a file can go unnoticed. ++chk_data_crc (*) do not skip checking CRCs on data nodes ++compr=none override default compressor and set it to "none" ++compr=lzo override default compressor and set it to "lzo" ++compr=zlib override default compressor and set it to "zlib" ++ ++ ++Quick usage instructions ++======================== ++ ++The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax, ++where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is ++UBI volume name. ++ ++Mount volume 0 on UBI device 0 to /mnt/ubifs: ++$ mount -t ubifs ubi0_0 /mnt/ubifs ++ ++Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume ++name): ++$ mount -t ubifs ubi0:rootfs /mnt/ubifs ++ ++The following is an example of the kernel boot arguments to attach mtd0 ++to UBI and mount volume "rootfs": ++ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs ++ ++ ++Module Parameters for Debugging ++=============================== ++ ++When UBIFS has been compiled with debugging enabled, there are 3 module ++parameters that are available to control aspects of testing and debugging. ++The parameters are unsigned integers where each bit controls an option. ++The parameters are: ++ ++debug_msgs Selects which debug messages to display, as follows: ++ ++ Message Type Flag value ++ ++ General messages 1 ++ Journal messages 2 ++ Mount messages 4 ++ Commit messages 8 ++ LEB search messages 16 ++ Budgeting messages 32 ++ Garbage collection messages 64 ++ Tree Node Cache (TNC) messages 128 ++ LEB properties (lprops) messages 256 ++ Input/output messages 512 ++ Log messages 1024 ++ Scan messages 2048 ++ Recovery messages 4096 ++ ++debug_chks Selects extra checks that UBIFS can do while running: ++ ++ Check Flag value ++ ++ General checks 1 ++ Check Tree Node Cache (TNC) 2 ++ Check indexing tree size 4 ++ Check orphan area 8 ++ Check old indexing tree 16 ++ Check LEB properties (lprops) 32 ++ Check leaf nodes and inodes 64 ++ ++debug_tsts Selects a mode of testing, as follows: ++ ++ Test mode Flag value ++ ++ Force in-the-gaps method 2 ++ Failure mode for recovery testing 4 ++ ++For example, set debug_msgs to 5 to display General messages and Mount ++messages. ++ ++ ++References ++========== ++ ++UBIFS documentation and FAQ/HOWTO at the MTD web site: ++http://www.linux-mtd.infradead.org/doc/ubifs.html ++http://www.linux-mtd.infradead.org/faq/ubifs.html +diff -Nurd linux-2.6.24/MAINTAINERS ubifs-v2.6.24/MAINTAINERS +--- linux-2.6.24/MAINTAINERS 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/MAINTAINERS 2009-04-07 17:14:47.000000000 +0200 +@@ -2148,6 +2148,15 @@ + W: http://www.linux-mtd.infradead.org/doc/jffs2.html + S: Maintained + ++UBI FILE SYSTEM (UBIFS) ++P: Artem Bityutskiy ++M: dedekind@infradead.org ++P: Adrian Hunter ++M: ext-adrian.hunter@nokia.com ++L: linux-mtd@lists.infradead.org ++W: http://www.linux-mtd.infradead.org/doc/ubifs.html ++S: Maintained ++ + JFS FILESYSTEM + P: Dave Kleikamp + M: shaggy@austin.ibm.com +@@ -2547,7 +2556,7 @@ + M: dedekind@infradead.org + W: http://www.linux-mtd.infradead.org/ + L: linux-mtd@lists.infradead.org +-T: git git://git.infradead.org/~dedekind/ubi-2.6.git ++T: git git://git.infradead.org/ubi-2.6.git + S: Maintained + + MICROTEK X6 SCANNER +diff -Nurd linux-2.6.24/crypto/Kconfig ubifs-v2.6.24/crypto/Kconfig +--- linux-2.6.24/crypto/Kconfig 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/crypto/Kconfig 2009-04-07 17:14:47.000000000 +0200 +@@ -502,6 +502,14 @@ + Authenc: Combined mode wrapper for IPsec. + This is required for IPSec. + ++config CRYPTO_LZO ++ tristate "LZO compression algorithm" ++ select CRYPTO_ALGAPI ++ select LZO_COMPRESS ++ select LZO_DECOMPRESS ++ help ++ This is the LZO algorithm. ++ + source "drivers/crypto/Kconfig" + + endif # if CRYPTO +diff -Nurd linux-2.6.24/crypto/Makefile ubifs-v2.6.24/crypto/Makefile +--- linux-2.6.24/crypto/Makefile 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/crypto/Makefile 2009-04-07 17:14:47.000000000 +0200 +@@ -51,6 +51,7 @@ + obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o + obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o + obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o ++obj-$(CONFIG_CRYPTO_LZO) += lzo.o + obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o + + obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o +diff -Nurd linux-2.6.24/crypto/lzo.c ubifs-v2.6.24/crypto/lzo.c +--- linux-2.6.24/crypto/lzo.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/crypto/lzo.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,106 @@ ++/* ++ * Cryptographic API. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ */ ++ ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/crypto.h> ++#include <linux/vmalloc.h> ++#include <linux/lzo.h> ++ ++struct lzo_ctx { ++ void *lzo_comp_mem; ++}; ++ ++static int lzo_init(struct crypto_tfm *tfm) ++{ ++ struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); ++ ++ ctx->lzo_comp_mem = vmalloc(LZO1X_MEM_COMPRESS); ++ if (!ctx->lzo_comp_mem) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void lzo_exit(struct crypto_tfm *tfm) ++{ ++ struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); ++ ++ vfree(ctx->lzo_comp_mem); ++} ++ ++static int lzo_compress(struct crypto_tfm *tfm, const u8 *src, ++ unsigned int slen, u8 *dst, unsigned int *dlen) ++{ ++ struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); ++ size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ ++ int err; ++ ++ err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx->lzo_comp_mem); ++ ++ if (err != LZO_E_OK) ++ return -EINVAL; ++ ++ *dlen = tmp_len; ++ return 0; ++} ++ ++static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src, ++ unsigned int slen, u8 *dst, unsigned int *dlen) ++{ ++ int err; ++ size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ ++ ++ err = lzo1x_decompress_safe(src, slen, dst, &tmp_len); ++ ++ if (err != LZO_E_OK) ++ return -EINVAL; ++ ++ *dlen = tmp_len; ++ return 0; ++ ++} ++ ++static struct crypto_alg alg = { ++ .cra_name = "lzo", ++ .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, ++ .cra_ctxsize = sizeof(struct lzo_ctx), ++ .cra_module = THIS_MODULE, ++ .cra_list = LIST_HEAD_INIT(alg.cra_list), ++ .cra_init = lzo_init, ++ .cra_exit = lzo_exit, ++ .cra_u = { .compress = { ++ .coa_compress = lzo_compress, ++ .coa_decompress = lzo_decompress } } ++}; ++ ++static int __init init(void) ++{ ++ return crypto_register_alg(&alg); ++} ++ ++static void __exit fini(void) ++{ ++ crypto_unregister_alg(&alg); ++} ++ ++module_init(init); ++module_exit(fini); ++ ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("LZO Compression Algorithm"); +diff -Nurd linux-2.6.24/crypto/tcrypt.c ubifs-v2.6.24/crypto/tcrypt.c +--- linux-2.6.24/crypto/tcrypt.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/crypto/tcrypt.c 2009-04-07 17:14:47.000000000 +0200 +@@ -78,7 +78,7 @@ + "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", + "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", + "khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt", +- "camellia", "seed", NULL ++ "camellia", "seed", "lzo", NULL + }; + + static void hexdump(unsigned char *buf, unsigned int len) +@@ -800,7 +800,8 @@ + crypto_free_hash(tfm); + } + +-static void test_deflate(void) ++static void test_comp(char *algo, struct comp_testvec *ctemplate, ++ struct comp_testvec *dtemplate, int ctcount, int dtcount) + { + unsigned int i; + char result[COMP_BUF_SIZE]; +@@ -808,25 +809,26 @@ + struct comp_testvec *tv; + unsigned int tsize; + +- printk("\ntesting deflate compression\n"); ++ printk("\ntesting %s compression\n", algo); + +- tsize = sizeof (deflate_comp_tv_template); ++ tsize = sizeof(struct comp_testvec); ++ tsize *= ctcount; + if (tsize > TVMEMSIZE) { + printk("template (%u) too big for tvmem (%u)\n", tsize, + TVMEMSIZE); + return; + } + +- memcpy(tvmem, deflate_comp_tv_template, tsize); ++ memcpy(tvmem, ctemplate, tsize); + tv = (void *)tvmem; + +- tfm = crypto_alloc_comp("deflate", 0, CRYPTO_ALG_ASYNC); ++ tfm = crypto_alloc_comp(algo, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { +- printk("failed to load transform for deflate\n"); ++ printk("failed to load transform for %s\n", algo); + return; + } + +- for (i = 0; i < DEFLATE_COMP_TEST_VECTORS; i++) { ++ for (i = 0; i < ctcount; i++) { + int ilen, ret, dlen = COMP_BUF_SIZE; + + printk("test %u:\n", i + 1); +@@ -845,19 +847,20 @@ + ilen, dlen); + } + +- printk("\ntesting deflate decompression\n"); ++ printk("\ntesting %s decompression\n", algo); + +- tsize = sizeof (deflate_decomp_tv_template); ++ tsize = sizeof(struct comp_testvec); ++ tsize *= dtcount; + if (tsize > TVMEMSIZE) { + printk("template (%u) too big for tvmem (%u)\n", tsize, + TVMEMSIZE); + goto out; + } + +- memcpy(tvmem, deflate_decomp_tv_template, tsize); ++ memcpy(tvmem, dtemplate, tsize); + tv = (void *)tvmem; + +- for (i = 0; i < DEFLATE_DECOMP_TEST_VECTORS; i++) { ++ for (i = 0; i < dtcount; i++) { + int ilen, ret, dlen = COMP_BUF_SIZE; + + printk("test %u:\n", i + 1); +@@ -1057,7 +1060,11 @@ + test_hash("tgr192", tgr192_tv_template, TGR192_TEST_VECTORS); + test_hash("tgr160", tgr160_tv_template, TGR160_TEST_VECTORS); + test_hash("tgr128", tgr128_tv_template, TGR128_TEST_VECTORS); +- test_deflate(); ++ test_comp("deflate", deflate_comp_tv_template, ++ deflate_decomp_tv_template, DEFLATE_COMP_TEST_VECTORS, ++ DEFLATE_DECOMP_TEST_VECTORS); ++ test_comp("lzo", lzo_comp_tv_template, lzo_decomp_tv_template, ++ LZO_COMP_TEST_VECTORS, LZO_DECOMP_TEST_VECTORS); + test_hash("crc32c", crc32c_tv_template, CRC32C_TEST_VECTORS); + test_hash("hmac(md5)", hmac_md5_tv_template, + HMAC_MD5_TEST_VECTORS); +@@ -1167,7 +1174,9 @@ + break; + + case 13: +- test_deflate(); ++ test_comp("deflate", deflate_comp_tv_template, ++ deflate_decomp_tv_template, DEFLATE_COMP_TEST_VECTORS, ++ DEFLATE_DECOMP_TEST_VECTORS); + break; + + case 14: +@@ -1292,6 +1301,11 @@ + CAMELLIA_CBC_DEC_TEST_VECTORS); + break; + ++ case 33: ++ test_comp("lzo", lzo_comp_tv_template, lzo_decomp_tv_template, ++ LZO_COMP_TEST_VECTORS, LZO_DECOMP_TEST_VECTORS); ++ break; ++ + case 100: + test_hash("hmac(md5)", hmac_md5_tv_template, + HMAC_MD5_TEST_VECTORS); +diff -Nurd linux-2.6.24/crypto/tcrypt.h ubifs-v2.6.24/crypto/tcrypt.h +--- linux-2.6.24/crypto/tcrypt.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/crypto/tcrypt.h 2009-04-07 17:14:47.000000000 +0200 +@@ -4408,6 +4408,88 @@ + }; + + /* ++ * LZO test vectors (null-terminated strings). ++ */ ++#define LZO_COMP_TEST_VECTORS 2 ++#define LZO_DECOMP_TEST_VECTORS 2 ++ ++static struct comp_testvec lzo_comp_tv_template[] = { ++ { ++ .inlen = 70, ++ .outlen = 46, ++ .input = "Join us now and share the software " ++ "Join us now and share the software ", ++ .output = { 0x00, 0x0d, 0x4a, 0x6f, 0x69, 0x6e, 0x20, 0x75, ++ 0x73, 0x20, 0x6e, 0x6f, 0x77, 0x20, 0x61, 0x6e, ++ 0x64, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x20, ++ 0x74, 0x68, 0x65, 0x20, 0x73, 0x6f, 0x66, 0x74, ++ 0x77, 0x70, 0x01, 0x01, 0x4a, 0x6f, 0x69, 0x6e, ++ 0x3d, 0x88, 0x00, 0x11, 0x00, 0x00 }, ++ }, { ++ .inlen = 159, ++ .outlen = 133, ++ .input = "This document describes a compression method based on the LZO " ++ "compression algorithm. This document defines the application of " ++ "the LZO algorithm used in UBIFS.", ++ .output = { 0x00, 0x2b, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, ++ 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x20, ++ 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, ++ 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x70, ++ 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x20, ++ 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x20, 0x62, ++ 0x61, 0x73, 0x65, 0x64, 0x20, 0x6f, 0x6e, 0x20, ++ 0x74, 0x68, 0x65, 0x20, 0x4c, 0x5a, 0x4f, 0x2b, ++ 0x8c, 0x00, 0x0d, 0x61, 0x6c, 0x67, 0x6f, 0x72, ++ 0x69, 0x74, 0x68, 0x6d, 0x2e, 0x20, 0x20, 0x54, ++ 0x68, 0x69, 0x73, 0x2a, 0x54, 0x01, 0x02, 0x66, ++ 0x69, 0x6e, 0x65, 0x73, 0x94, 0x06, 0x05, 0x61, ++ 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x76, ++ 0x0a, 0x6f, 0x66, 0x88, 0x02, 0x60, 0x09, 0x27, ++ 0xf0, 0x00, 0x0c, 0x20, 0x75, 0x73, 0x65, 0x64, ++ 0x20, 0x69, 0x6e, 0x20, 0x55, 0x42, 0x49, 0x46, ++ 0x53, 0x2e, 0x11, 0x00, 0x00 }, ++ }, ++}; ++ ++static struct comp_testvec lzo_decomp_tv_template[] = { ++ { ++ .inlen = 133, ++ .outlen = 159, ++ .input = { 0x00, 0x2b, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, ++ 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x20, ++ 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, ++ 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x70, ++ 0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x20, ++ 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x20, 0x62, ++ 0x61, 0x73, 0x65, 0x64, 0x20, 0x6f, 0x6e, 0x20, ++ 0x74, 0x68, 0x65, 0x20, 0x4c, 0x5a, 0x4f, 0x2b, ++ 0x8c, 0x00, 0x0d, 0x61, 0x6c, 0x67, 0x6f, 0x72, ++ 0x69, 0x74, 0x68, 0x6d, 0x2e, 0x20, 0x20, 0x54, ++ 0x68, 0x69, 0x73, 0x2a, 0x54, 0x01, 0x02, 0x66, ++ 0x69, 0x6e, 0x65, 0x73, 0x94, 0x06, 0x05, 0x61, ++ 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x74, 0x76, ++ 0x0a, 0x6f, 0x66, 0x88, 0x02, 0x60, 0x09, 0x27, ++ 0xf0, 0x00, 0x0c, 0x20, 0x75, 0x73, 0x65, 0x64, ++ 0x20, 0x69, 0x6e, 0x20, 0x55, 0x42, 0x49, 0x46, ++ 0x53, 0x2e, 0x11, 0x00, 0x00 }, ++ .output = "This document describes a compression method based on the LZO " ++ "compression algorithm. This document defines the application of " ++ "the LZO algorithm used in UBIFS.", ++ }, { ++ .inlen = 46, ++ .outlen = 70, ++ .input = { 0x00, 0x0d, 0x4a, 0x6f, 0x69, 0x6e, 0x20, 0x75, ++ 0x73, 0x20, 0x6e, 0x6f, 0x77, 0x20, 0x61, 0x6e, ++ 0x64, 0x20, 0x73, 0x68, 0x61, 0x72, 0x65, 0x20, ++ 0x74, 0x68, 0x65, 0x20, 0x73, 0x6f, 0x66, 0x74, ++ 0x77, 0x70, 0x01, 0x01, 0x4a, 0x6f, 0x69, 0x6e, ++ 0x3d, 0x88, 0x00, 0x11, 0x00, 0x00 }, ++ .output = "Join us now and share the software " ++ "Join us now and share the software ", ++ }, ++}; ++ ++/* + * Michael MIC test vectors from IEEE 802.11i + */ + #define MICHAEL_MIC_TEST_VECTORS 6 +diff -Nurd linux-2.6.24/drivers/mtd/nand/nandsim.c ubifs-v2.6.24/drivers/mtd/nand/nandsim.c +--- linux-2.6.24/drivers/mtd/nand/nandsim.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/nand/nandsim.c 2009-04-07 17:14:47.000000000 +0200 +@@ -39,6 +39,7 @@ + #include <linux/delay.h> + #include <linux/list.h> + #include <linux/random.h> ++#include <asm/div64.h> + + /* Default simulator parameters values */ + #if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE) || \ +@@ -298,11 +299,11 @@ + + /* NAND flash "geometry" */ + struct nandsin_geometry { +- uint32_t totsz; /* total flash size, bytes */ ++ uint64_t totsz; /* total flash size, bytes */ + uint32_t secsz; /* flash sector (erase block) size, bytes */ + uint pgsz; /* NAND flash page size, bytes */ + uint oobsz; /* page OOB area size, bytes */ +- uint32_t totszoob; /* total flash size including OOB, bytes */ ++ uint64_t totszoob; /* total flash size including OOB, bytes */ + uint pgszoob; /* page size including OOB , bytes*/ + uint secszoob; /* sector size including OOB, bytes */ + uint pgnum; /* total number of pages */ +@@ -459,6 +460,12 @@ + return kstrdup(buf, GFP_KERNEL); + } + ++static u_int64_t divide(u_int64_t n, u_int32_t d) ++{ ++ do_div(n, d); ++ return n; ++} ++ + /* + * Initialize the nandsim structure. + * +@@ -469,8 +476,8 @@ + struct nand_chip *chip = (struct nand_chip *)mtd->priv; + struct nandsim *ns = (struct nandsim *)(chip->priv); + int i, ret = 0; +- u_int32_t remains; +- u_int32_t next_offset; ++ u_int64_t remains; ++ u_int64_t next_offset; + + if (NS_IS_INITIALIZED(ns)) { + NS_ERR("init_nandsim: nandsim is already initialized\n"); +@@ -487,8 +494,8 @@ + ns->geom.oobsz = mtd->oobsize; + ns->geom.secsz = mtd->erasesize; + ns->geom.pgszoob = ns->geom.pgsz + ns->geom.oobsz; +- ns->geom.pgnum = ns->geom.totsz / ns->geom.pgsz; +- ns->geom.totszoob = ns->geom.totsz + ns->geom.pgnum * ns->geom.oobsz; ++ ns->geom.pgnum = divide(ns->geom.totsz, ns->geom.pgsz); ++ ns->geom.totszoob = ns->geom.totsz + (uint64_t)ns->geom.pgnum * ns->geom.oobsz; + ns->geom.secshift = ffs(ns->geom.secsz) - 1; + ns->geom.pgshift = chip->page_shift; + ns->geom.oobshift = ffs(ns->geom.oobsz) - 1; +@@ -511,7 +518,7 @@ + } + + if (ns->options & OPT_SMALLPAGE) { +- if (ns->geom.totsz < (32 << 20)) { ++ if (ns->geom.totsz <= (32 << 20)) { + ns->geom.pgaddrbytes = 3; + ns->geom.secaddrbytes = 2; + } else { +@@ -537,15 +544,16 @@ + remains = ns->geom.totsz; + next_offset = 0; + for (i = 0; i < parts_num; ++i) { +- unsigned long part = parts[i]; +- if (!part || part > remains / ns->geom.secsz) { ++ u_int64_t part_sz = (u_int64_t)parts[i] * ns->geom.secsz; ++ ++ if (!part_sz || part_sz > remains) { + NS_ERR("bad partition size.\n"); + ret = -EINVAL; + goto error; + } + ns->partitions[i].name = get_partition_name(i); + ns->partitions[i].offset = next_offset; +- ns->partitions[i].size = part * ns->geom.secsz; ++ ns->partitions[i].size = part_sz; + next_offset += ns->partitions[i].size; + remains -= ns->partitions[i].size; + } +@@ -573,7 +581,8 @@ + if (ns->busw == 16) + NS_WARN("16-bit flashes support wasn't tested\n"); + +- printk("flash size: %u MiB\n", ns->geom.totsz >> 20); ++ printk("flash size: %llu MiB\n", ++ (unsigned long long)ns->geom.totsz >> 20); + printk("page size: %u bytes\n", ns->geom.pgsz); + printk("OOB area size: %u bytes\n", ns->geom.oobsz); + printk("sector size: %u KiB\n", ns->geom.secsz >> 10); +@@ -582,8 +591,9 @@ + printk("bus width: %u\n", ns->busw); + printk("bits in sector size: %u\n", ns->geom.secshift); + printk("bits in page size: %u\n", ns->geom.pgshift); +- printk("bits in OOB size: %u\n", ns->geom.oobshift); +- printk("flash size with OOB: %u KiB\n", ns->geom.totszoob >> 10); ++ printk("bits in OOB size: %u\n", ns->geom.oobshift); ++ printk("flash size with OOB: %llu KiB\n", ++ (unsigned long long)ns->geom.totszoob >> 10); + printk("page address bytes: %u\n", ns->geom.pgaddrbytes); + printk("sector address bytes: %u\n", ns->geom.secaddrbytes); + printk("options: %#x\n", ns->options); +@@ -825,7 +835,7 @@ + + if (!rptwear) + return 0; +- wear_eb_count = mtd->size / mtd->erasesize; ++ wear_eb_count = divide(mtd->size, mtd->erasesize); + mem = wear_eb_count * sizeof(unsigned long); + if (mem / sizeof(unsigned long) != wear_eb_count) { + NS_ERR("Too many erase blocks for wear reporting\n"); +@@ -2013,7 +2023,7 @@ + } + + if (overridesize) { +- u_int32_t new_size = nsmtd->erasesize << overridesize; ++ u_int64_t new_size = (u_int64_t)nsmtd->erasesize << overridesize; + if (new_size >> overridesize != nsmtd->erasesize) { + NS_ERR("overridesize is too big\n"); + goto err_exit; +@@ -2021,7 +2031,8 @@ + /* N.B. This relies on nand_scan not doing anything with the size before we change it */ + nsmtd->size = new_size; + chip->chipsize = new_size; +- chip->chip_shift = ffs(new_size) - 1; ++ chip->chip_shift = ffs(nsmtd->erasesize) + overridesize - 1; ++ chip->pagemask = (chip->chipsize >> chip->page_shift) - 1; + } + + if ((retval = setup_wear_reporting(nsmtd)) != 0) +diff -Nurd linux-2.6.24/drivers/mtd/ubi/Kconfig ubifs-v2.6.24/drivers/mtd/ubi/Kconfig +--- linux-2.6.24/drivers/mtd/ubi/Kconfig 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/Kconfig 2009-04-07 17:14:47.000000000 +0200 +@@ -24,8 +24,13 @@ + erase counter value and the lowest erase counter value of eraseblocks + of UBI devices. When this threshold is exceeded, UBI starts performing + wear leveling by means of moving data from eraseblock with low erase +- counter to eraseblocks with high erase counter. Leave the default +- value if unsure. ++ counter to eraseblocks with high erase counter. ++ ++ The default value should be OK for SLC NAND flashes, NOR flashes and ++ other flashes which have eraseblock life-cycle 100000 or more. ++ However, in case of MLC NAND flashes which typically have eraseblock ++ life-cycle less then 10000, the threshold should be lessened (e.g., ++ to 128 or 256, although it does not have to be power of 2). + + config MTD_UBI_BEB_RESERVE + int "Percentage of reserved eraseblocks for bad eraseblocks handling" +diff -Nurd linux-2.6.24/drivers/mtd/ubi/Kconfig.debug ubifs-v2.6.24/drivers/mtd/ubi/Kconfig.debug +--- linux-2.6.24/drivers/mtd/ubi/Kconfig.debug 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/Kconfig.debug 2009-04-07 17:14:47.000000000 +0200 +@@ -33,16 +33,6 @@ + This option switches the background thread off by default. The thread + may be also be enabled/disabled via UBI sysfs. + +-config MTD_UBI_DEBUG_USERSPACE_IO +- bool "Direct user-space write/erase support" +- default n +- depends on MTD_UBI_DEBUG +- help +- By default, users cannot directly write and erase individual +- eraseblocks of dynamic volumes, and have to use update operation +- instead. This option enables this capability - it is very useful for +- debugging and testing. +- + config MTD_UBI_DEBUG_EMULATE_BITFLIPS + bool "Emulate flash bit-flips" + depends on MTD_UBI_DEBUG +diff -Nurd linux-2.6.24/drivers/mtd/ubi/build.c ubifs-v2.6.24/drivers/mtd/ubi/build.c +--- linux-2.6.24/drivers/mtd/ubi/build.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/build.c 2009-04-07 17:14:47.000000000 +0200 +@@ -21,11 +21,16 @@ + */ + + /* +- * This file includes UBI initialization and building of UBI devices. At the +- * moment UBI devices may only be added while UBI is initialized, but dynamic +- * device add/remove functionality is planned. Also, at the moment we only +- * attach UBI devices by scanning, which will become a bottleneck when flashes +- * reach certain large size. Then one may improve UBI and add other methods. ++ * This file includes UBI initialization and building of UBI devices. ++ * ++ * When UBI is initialized, it attaches all the MTD devices specified as the ++ * module load parameters or the kernel boot parameters. If MTD devices were ++ * specified, UBI does not attach any MTD device, but it is possible to do ++ * later using the "UBI control device". ++ * ++ * At the moment we only attach UBI devices by scanning, which will become a ++ * bottleneck when flashes reach certain large size. Then one may improve UBI ++ * and add other methods, although it does not seem to be easy to do. + */ + + #include <linux/err.h> +@@ -33,7 +38,9 @@ + #include <linux/moduleparam.h> + #include <linux/stringify.h> + #include <linux/stat.h> ++#include <linux/miscdevice.h> + #include <linux/log2.h> ++#include <linux/kthread.h> + #include "ubi.h" + + /* Maximum length of the 'mtd=' parameter */ +@@ -43,29 +50,39 @@ + * struct mtd_dev_param - MTD device parameter description data structure. + * @name: MTD device name or number string + * @vid_hdr_offs: VID header offset +- * @data_offs: data offset + */ +-struct mtd_dev_param +-{ ++struct mtd_dev_param { + char name[MTD_PARAM_LEN_MAX]; + int vid_hdr_offs; +- int data_offs; + }; + + /* Numbers of elements set in the @mtd_dev_param array */ +-static int mtd_devs = 0; ++static int mtd_devs; + + /* MTD devices specification parameters */ + static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; + +-/* Number of UBI devices in system */ +-int ubi_devices_cnt; ++/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ ++struct class *ubi_class; ++ ++/* Slab cache for wear-leveling entries */ ++struct kmem_cache *ubi_wl_entry_slab; ++ ++/* UBI control character device */ ++static struct miscdevice ubi_ctrl_cdev = { ++ .minor = MISC_DYNAMIC_MINOR, ++ .name = "ubi_ctrl", ++ .fops = &ubi_ctrl_cdev_operations, ++}; + + /* All UBI devices in system */ +-struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; ++static struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; + +-/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ +-struct class *ubi_class; ++/* Serializes UBI devices creations and removals */ ++DEFINE_MUTEX(ubi_devices_mutex); ++ ++/* Protects @ubi_devices and @ubi->ref_count */ ++static DEFINE_SPINLOCK(ubi_devices_lock); + + /* "Show" method for files in '/<sysfs>/class/ubi/' */ + static ssize_t ubi_version_show(struct class *class, char *buf) +@@ -101,42 +118,157 @@ + __ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL); + static struct device_attribute dev_bgt_enabled = + __ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL); ++static struct device_attribute dev_mtd_num = ++ __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL); ++ ++/** ++ * ubi_get_device - get UBI device. ++ * @ubi_num: UBI device number ++ * ++ * This function returns UBI device description object for UBI device number ++ * @ubi_num, or %NULL if the device does not exist. This function increases the ++ * device reference count to prevent removal of the device. In other words, the ++ * device cannot be removed if its reference count is not zero. ++ */ ++struct ubi_device *ubi_get_device(int ubi_num) ++{ ++ struct ubi_device *ubi; ++ ++ spin_lock(&ubi_devices_lock); ++ ubi = ubi_devices[ubi_num]; ++ if (ubi) { ++ ubi_assert(ubi->ref_count >= 0); ++ ubi->ref_count += 1; ++ get_device(&ubi->dev); ++ } ++ spin_unlock(&ubi_devices_lock); ++ ++ return ubi; ++} ++ ++/** ++ * ubi_put_device - drop an UBI device reference. ++ * @ubi: UBI device description object ++ */ ++void ubi_put_device(struct ubi_device *ubi) ++{ ++ spin_lock(&ubi_devices_lock); ++ ubi->ref_count -= 1; ++ put_device(&ubi->dev); ++ spin_unlock(&ubi_devices_lock); ++} ++ ++/** ++ * ubi_get_by_major - get UBI device by character device major number. ++ * @major: major number ++ * ++ * This function is similar to 'ubi_get_device()', but it searches the device ++ * by its major number. ++ */ ++struct ubi_device *ubi_get_by_major(int major) ++{ ++ int i; ++ struct ubi_device *ubi; ++ ++ spin_lock(&ubi_devices_lock); ++ for (i = 0; i < UBI_MAX_DEVICES; i++) { ++ ubi = ubi_devices[i]; ++ if (ubi && MAJOR(ubi->cdev.dev) == major) { ++ ubi_assert(ubi->ref_count >= 0); ++ ubi->ref_count += 1; ++ get_device(&ubi->dev); ++ spin_unlock(&ubi_devices_lock); ++ return ubi; ++ } ++ } ++ spin_unlock(&ubi_devices_lock); ++ ++ return NULL; ++} ++ ++/** ++ * ubi_major2num - get UBI device number by character device major number. ++ * @major: major number ++ * ++ * This function searches UBI device number object by its major number. If UBI ++ * device was not found, this function returns -ENODEV, otherwise the UBI device ++ * number is returned. ++ */ ++int ubi_major2num(int major) ++{ ++ int i, ubi_num = -ENODEV; ++ ++ spin_lock(&ubi_devices_lock); ++ for (i = 0; i < UBI_MAX_DEVICES; i++) { ++ struct ubi_device *ubi = ubi_devices[i]; ++ ++ if (ubi && MAJOR(ubi->cdev.dev) == major) { ++ ubi_num = ubi->ubi_num; ++ break; ++ } ++ } ++ spin_unlock(&ubi_devices_lock); ++ ++ return ubi_num; ++} + + /* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */ + static ssize_t dev_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) + { +- const struct ubi_device *ubi; ++ ssize_t ret; ++ struct ubi_device *ubi; + ++ /* ++ * The below code looks weird, but it actually makes sense. We get the ++ * UBI device reference from the contained 'struct ubi_device'. But it ++ * is unclear if the device was removed or not yet. Indeed, if the ++ * device was removed before we increased its reference count, ++ * 'ubi_get_device()' will return -ENODEV and we fail. ++ * ++ * Remember, 'struct ubi_device' is freed in the release function, so ++ * we still can use 'ubi->ubi_num'. ++ */ + ubi = container_of(dev, struct ubi_device, dev); ++ ubi = ubi_get_device(ubi->ubi_num); ++ if (!ubi) ++ return -ENODEV; ++ + if (attr == &dev_eraseblock_size) +- return sprintf(buf, "%d\n", ubi->leb_size); ++ ret = sprintf(buf, "%d\n", ubi->leb_size); + else if (attr == &dev_avail_eraseblocks) +- return sprintf(buf, "%d\n", ubi->avail_pebs); ++ ret = sprintf(buf, "%d\n", ubi->avail_pebs); + else if (attr == &dev_total_eraseblocks) +- return sprintf(buf, "%d\n", ubi->good_peb_count); ++ ret = sprintf(buf, "%d\n", ubi->good_peb_count); + else if (attr == &dev_volumes_count) +- return sprintf(buf, "%d\n", ubi->vol_count); ++ ret = sprintf(buf, "%d\n", ubi->vol_count - UBI_INT_VOL_COUNT); + else if (attr == &dev_max_ec) +- return sprintf(buf, "%d\n", ubi->max_ec); ++ ret = sprintf(buf, "%d\n", ubi->max_ec); + else if (attr == &dev_reserved_for_bad) +- return sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); ++ ret = sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); + else if (attr == &dev_bad_peb_count) +- return sprintf(buf, "%d\n", ubi->bad_peb_count); ++ ret = sprintf(buf, "%d\n", ubi->bad_peb_count); + else if (attr == &dev_max_vol_count) +- return sprintf(buf, "%d\n", ubi->vtbl_slots); ++ ret = sprintf(buf, "%d\n", ubi->vtbl_slots); + else if (attr == &dev_min_io_size) +- return sprintf(buf, "%d\n", ubi->min_io_size); ++ ret = sprintf(buf, "%d\n", ubi->min_io_size); + else if (attr == &dev_bgt_enabled) +- return sprintf(buf, "%d\n", ubi->thread_enabled); ++ ret = sprintf(buf, "%d\n", ubi->thread_enabled); ++ else if (attr == &dev_mtd_num) ++ ret = sprintf(buf, "%d\n", ubi->mtd->index); + else +- BUG(); ++ ret = -EINVAL; + +- return 0; ++ ubi_put_device(ubi); ++ return ret; + } + +-/* Fake "release" method for UBI devices */ +-static void dev_release(struct device *dev) { } ++static void dev_release(struct device *dev) ++{ ++ struct ubi_device *ubi = container_of(dev, struct ubi_device, dev); ++ ++ kfree(ubi); ++} + + /** + * ubi_sysfs_init - initialize sysfs for an UBI device. +@@ -150,68 +282,44 @@ + int err; + + ubi->dev.release = dev_release; +- ubi->dev.devt = MKDEV(ubi->major, 0); ++ ubi->dev.devt = ubi->cdev.dev; + ubi->dev.class = ubi_class; + sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num); + err = device_register(&ubi->dev); + if (err) +- goto out; ++ return err; + + err = device_create_file(&ubi->dev, &dev_eraseblock_size); + if (err) +- goto out_unregister; ++ return err; + err = device_create_file(&ubi->dev, &dev_avail_eraseblocks); + if (err) +- goto out_eraseblock_size; ++ return err; + err = device_create_file(&ubi->dev, &dev_total_eraseblocks); + if (err) +- goto out_avail_eraseblocks; ++ return err; + err = device_create_file(&ubi->dev, &dev_volumes_count); + if (err) +- goto out_total_eraseblocks; ++ return err; + err = device_create_file(&ubi->dev, &dev_max_ec); + if (err) +- goto out_volumes_count; ++ return err; + err = device_create_file(&ubi->dev, &dev_reserved_for_bad); + if (err) +- goto out_volumes_max_ec; ++ return err; + err = device_create_file(&ubi->dev, &dev_bad_peb_count); + if (err) +- goto out_reserved_for_bad; ++ return err; + err = device_create_file(&ubi->dev, &dev_max_vol_count); + if (err) +- goto out_bad_peb_count; ++ return err; + err = device_create_file(&ubi->dev, &dev_min_io_size); + if (err) +- goto out_max_vol_count; ++ return err; + err = device_create_file(&ubi->dev, &dev_bgt_enabled); + if (err) +- goto out_min_io_size; +- +- return 0; +- +-out_min_io_size: +- device_remove_file(&ubi->dev, &dev_min_io_size); +-out_max_vol_count: +- device_remove_file(&ubi->dev, &dev_max_vol_count); +-out_bad_peb_count: +- device_remove_file(&ubi->dev, &dev_bad_peb_count); +-out_reserved_for_bad: +- device_remove_file(&ubi->dev, &dev_reserved_for_bad); +-out_volumes_max_ec: +- device_remove_file(&ubi->dev, &dev_max_ec); +-out_volumes_count: +- device_remove_file(&ubi->dev, &dev_volumes_count); +-out_total_eraseblocks: +- device_remove_file(&ubi->dev, &dev_total_eraseblocks); +-out_avail_eraseblocks: +- device_remove_file(&ubi->dev, &dev_avail_eraseblocks); +-out_eraseblock_size: +- device_remove_file(&ubi->dev, &dev_eraseblock_size); +-out_unregister: +- device_unregister(&ubi->dev); +-out: +- ubi_err("failed to initialize sysfs for %s", ubi->ubi_name); ++ return err; ++ err = device_create_file(&ubi->dev, &dev_mtd_num); + return err; + } + +@@ -221,6 +329,7 @@ + */ + static void ubi_sysfs_close(struct ubi_device *ubi) + { ++ device_remove_file(&ubi->dev, &dev_mtd_num); + device_remove_file(&ubi->dev, &dev_bgt_enabled); + device_remove_file(&ubi->dev, &dev_min_io_size); + device_remove_file(&ubi->dev, &dev_max_vol_count); +@@ -244,7 +353,26 @@ + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) +- ubi_free_volume(ubi, i); ++ ubi_free_volume(ubi, ubi->volumes[i]); ++} ++ ++/** ++ * free_user_volumes - free all user volumes. ++ * @ubi: UBI device description object ++ * ++ * Normally the volumes are freed at the release function of the volume device ++ * objects. However, on error paths the volumes have to be freed before the ++ * device objects have been initialized. ++ */ ++static void free_user_volumes(struct ubi_device *ubi) ++{ ++ int i; ++ ++ for (i = 0; i < ubi->vtbl_slots; i++) ++ if (ubi->volumes[i]) { ++ kfree(ubi->volumes[i]->eba_tbl); ++ kfree(ubi->volumes[i]); ++ } + } + + /** +@@ -252,16 +380,13 @@ + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in +- * case of failure. ++ * case of failure. Note, this function destroys all volumes if it failes. + */ + static int uif_init(struct ubi_device *ubi) + { + int i, err; + dev_t dev; + +- mutex_init(&ubi->vtbl_mutex); +- spin_lock_init(&ubi->volumes_lock); +- + sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); + + /* +@@ -278,52 +403,72 @@ + return err; + } + ++ ubi_assert(MINOR(dev) == 0); + cdev_init(&ubi->cdev, &ubi_cdev_operations); +- ubi->major = MAJOR(dev); +- dbg_msg("%s major is %u", ubi->ubi_name, ubi->major); ++ dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev)); + ubi->cdev.owner = THIS_MODULE; + +- dev = MKDEV(ubi->major, 0); + err = cdev_add(&ubi->cdev, dev, 1); + if (err) { +- ubi_err("cannot add character device %s", ubi->ubi_name); ++ ubi_err("cannot add character device"); + goto out_unreg; + } + + err = ubi_sysfs_init(ubi); + if (err) +- goto out_cdev; ++ goto out_sysfs; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) { +- err = ubi_add_volume(ubi, i); +- if (err) ++ err = ubi_add_volume(ubi, ubi->volumes[i]); ++ if (err) { ++ ubi_err("cannot add volume %d", i); + goto out_volumes; ++ } + } + + return 0; + + out_volumes: + kill_volumes(ubi); ++out_sysfs: + ubi_sysfs_close(ubi); +-out_cdev: + cdev_del(&ubi->cdev); + out_unreg: +- unregister_chrdev_region(MKDEV(ubi->major, 0), +- ubi->vtbl_slots + 1); ++ unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); ++ ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err); + return err; + } + + /** + * uif_close - close user interfaces for an UBI device. + * @ubi: UBI device description object ++ * ++ * Note, since this function un-registers UBI volume device objects (@vol->dev), ++ * the memory allocated voe the volumes is freed as well (in the release ++ * function). + */ + static void uif_close(struct ubi_device *ubi) + { + kill_volumes(ubi); + ubi_sysfs_close(ubi); + cdev_del(&ubi->cdev); +- unregister_chrdev_region(MKDEV(ubi->major, 0), ubi->vtbl_slots + 1); ++ unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); ++} ++ ++/** ++ * free_internal_volumes - free internal volumes. ++ * @ubi: UBI device description object ++ */ ++static void free_internal_volumes(struct ubi_device *ubi) ++{ ++ int i; ++ ++ for (i = ubi->vtbl_slots; ++ i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { ++ kfree(ubi->volumes[i]->eba_tbl); ++ kfree(ubi->volumes[i]); ++ } + } + + /** +@@ -370,6 +515,7 @@ + out_wl: + ubi_wl_close(ubi); + out_vtbl: ++ free_internal_volumes(ubi); + vfree(ubi->vtbl); + out_si: + ubi_scan_destroy_si(si); +@@ -377,16 +523,16 @@ + } + + /** +- * io_init - initialize I/O unit for a given UBI device. ++ * io_init - initialize I/O sub-system for a given UBI device. + * @ubi: UBI device description object + * + * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are + * assumed: + * o EC header is always at offset zero - this cannot be changed; + * o VID header starts just after the EC header at the closest address +- * aligned to @io->@hdrs_min_io_size; ++ * aligned to @io->hdrs_min_io_size; + * o data starts just after the VID header at the closest address aligned to +- * @io->@min_io_size ++ * @io->min_io_size + * + * This function returns zero in case of success and a negative error code in + * case of failure. +@@ -407,6 +553,9 @@ + return -EINVAL; + } + ++ if (ubi->vid_hdr_offset < 0) ++ return -EINVAL; ++ + /* + * Note, in this implementation we support MTD devices with 0x7FFFFFFF + * physical eraseblocks maximum. +@@ -422,9 +571,14 @@ + ubi->min_io_size = ubi->mtd->writesize; + ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; + +- /* Make sure minimal I/O unit is power of 2 */ ++ /* ++ * Make sure minimal I/O unit is power of 2. Note, there is no ++ * fundamental reason for this assumption. It is just an optimization ++ * which allows us to avoid costly division operations. ++ */ + if (!is_power_of_2(ubi->min_io_size)) { +- ubi_err("bad min. I/O unit"); ++ ubi_err("min. I/O unit (%d) is not power of 2", ++ ubi->min_io_size); + return -EINVAL; + } + +@@ -453,10 +607,8 @@ + } + + /* Similar for the data offset */ +- if (ubi->leb_start == 0) { +- ubi->leb_start = ubi->vid_hdr_offset + ubi->vid_hdr_alsize; +- ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); +- } ++ ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE; ++ ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); + + dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset); + dbg_msg("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); +@@ -474,7 +626,7 @@ + if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || + ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || + ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || +- ubi->leb_start % ubi->min_io_size) { ++ ubi->leb_start & (ubi->min_io_size - 1)) { + ubi_err("bad VID header (%d) or data offsets (%d)", + ubi->vid_hdr_offset, ubi->leb_start); + return -EINVAL; +@@ -499,8 +651,16 @@ + ubi->ro_mode = 1; + } + +- dbg_msg("leb_size %d", ubi->leb_size); +- dbg_msg("ro_mode %d", ubi->ro_mode); ++ ubi_msg("physical eraseblock size: %d bytes (%d KiB)", ++ ubi->peb_size, ubi->peb_size >> 10); ++ ubi_msg("logical eraseblock size: %d bytes", ubi->leb_size); ++ ubi_msg("smallest flash I/O unit: %d", ubi->min_io_size); ++ if (ubi->hdrs_min_io_size != ubi->min_io_size) ++ ubi_msg("sub-page size: %d", ++ ubi->hdrs_min_io_size); ++ ubi_msg("VID header offset: %d (aligned %d)", ++ ubi->vid_hdr_offset, ubi->vid_hdr_aloffset); ++ ubi_msg("data offset: %d", ubi->leb_start); + + /* + * Note, ideally, we have to initialize ubi->bad_peb_count here. But +@@ -514,89 +674,162 @@ + } + + /** +- * attach_mtd_dev - attach an MTD device. +- * @mtd_dev: MTD device name or number string +- * @vid_hdr_offset: VID header offset +- * @data_offset: data offset ++ * autoresize - re-size the volume which has the "auto-resize" flag set. ++ * @ubi: UBI device description object ++ * @vol_id: ID of the volume to re-size + * +- * This function attaches an MTD device to UBI. It first treats @mtd_dev as the +- * MTD device name, and tries to open it by this name. If it is unable to open, +- * it tries to convert @mtd_dev to an integer and open the MTD device by its +- * number. Returns zero in case of success and a negative error code in case of +- * failure. ++ * This function re-sizes the volume marked by the @UBI_VTBL_AUTORESIZE_FLG in ++ * the volume table to the largest possible size. See comments in ubi-header.h ++ * for more description of the flag. Returns zero in case of success and a ++ * negative error code in case of failure. + */ +-static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset, +- int data_offset) ++static int autoresize(struct ubi_device *ubi, int vol_id) + { +- struct ubi_device *ubi; +- struct mtd_info *mtd; +- int i, err; ++ struct ubi_volume_desc desc; ++ struct ubi_volume *vol = ubi->volumes[vol_id]; ++ int err, old_reserved_pebs = vol->reserved_pebs; + +- mtd = get_mtd_device_nm(mtd_dev); +- if (IS_ERR(mtd)) { +- int mtd_num; +- char *endp; ++ /* ++ * Clear the auto-resize flag in the volume in-memory copy of the ++ * volume table, and 'ubi_resize_volume()' will propagate this change ++ * to the flash. ++ */ ++ ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG; + +- if (PTR_ERR(mtd) != -ENODEV) +- return PTR_ERR(mtd); ++ if (ubi->avail_pebs == 0) { ++ struct ubi_vtbl_record vtbl_rec; + + /* +- * Probably this is not MTD device name but MTD device number - +- * check this out. ++ * No available PEBs to re-size the volume, clear the flag on ++ * flash and exit. + */ +- mtd_num = simple_strtoul(mtd_dev, &endp, 0); +- if (*endp != '\0' || mtd_dev == endp) { +- ubi_err("incorrect MTD device: \"%s\"", mtd_dev); +- return -ENODEV; ++ memcpy(&vtbl_rec, &ubi->vtbl[vol_id], ++ sizeof(struct ubi_vtbl_record)); ++ err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); ++ if (err) ++ ubi_err("cannot clean auto-resize flag for volume %d", ++ vol_id); ++ } else { ++ desc.vol = vol; ++ err = ubi_resize_volume(&desc, ++ old_reserved_pebs + ubi->avail_pebs); ++ if (err) ++ ubi_err("cannot auto-resize volume %d", vol_id); ++ } ++ ++ if (err) ++ return err; ++ ++ ubi_msg("volume %d (\"%s\") re-sized from %d to %d LEBs", vol_id, ++ vol->name, old_reserved_pebs, vol->reserved_pebs); ++ return 0; ++} ++ ++/** ++ * ubi_attach_mtd_dev - attach an MTD device. ++ * @mtd: MTD device description object ++ * @ubi_num: number to assign to the new UBI device ++ * @vid_hdr_offset: VID header offset ++ * ++ * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number ++ * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in ++ * which case this function finds a vacant device number and assigns it ++ * automatically. Returns the new UBI device number in case of success and a ++ * negative error code in case of failure. ++ * ++ * Note, the invocations of this function has to be serialized by the ++ * @ubi_devices_mutex. ++ */ ++int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) ++{ ++ struct ubi_device *ubi; ++ int i, err, do_free = 1; ++ ++ /* ++ * Check if we already have the same MTD device attached. ++ * ++ * Note, this function assumes that UBI devices creations and deletions ++ * are serialized, so it does not take the &ubi_devices_lock. ++ */ ++ for (i = 0; i < UBI_MAX_DEVICES; i++) { ++ ubi = ubi_devices[i]; ++ if (ubi && mtd->index == ubi->mtd->index) { ++ dbg_err("mtd%d is already attached to ubi%d", ++ mtd->index, i); ++ return -EEXIST; + } ++ } + +- mtd = get_mtd_device(NULL, mtd_num); +- if (IS_ERR(mtd)) +- return PTR_ERR(mtd); ++ /* ++ * Make sure this MTD device is not emulated on top of an UBI volume ++ * already. Well, generally this recursion works fine, but there are ++ * different problems like the UBI module takes a reference to itself ++ * by attaching (and thus, opening) the emulated MTD device. This ++ * results in inability to unload the module. And in general it makes ++ * no sense to attach emulated MTD devices, so we prohibit this. ++ */ ++ if (mtd->type == MTD_UBIVOLUME) { ++ ubi_err("refuse attaching mtd%d - it is already emulated on " ++ "top of UBI", mtd->index); ++ return -EINVAL; + } + +- /* Check if we already have the same MTD device attached */ +- for (i = 0; i < ubi_devices_cnt; i++) +- if (ubi_devices[i]->mtd->index == mtd->index) { +- ubi_err("mtd%d is already attached to ubi%d", +- mtd->index, i); +- err = -EINVAL; +- goto out_mtd; ++ if (ubi_num == UBI_DEV_NUM_AUTO) { ++ /* Search for an empty slot in the @ubi_devices array */ ++ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) ++ if (!ubi_devices[ubi_num]) ++ break; ++ if (ubi_num == UBI_MAX_DEVICES) { ++ dbg_err("only %d UBI devices may be created", ++ UBI_MAX_DEVICES); ++ return -ENFILE; + } ++ } else { ++ if (ubi_num >= UBI_MAX_DEVICES) ++ return -EINVAL; + +- ubi = ubi_devices[ubi_devices_cnt] = kzalloc(sizeof(struct ubi_device), +- GFP_KERNEL); +- if (!ubi) { +- err = -ENOMEM; +- goto out_mtd; ++ /* Make sure ubi_num is not busy */ ++ if (ubi_devices[ubi_num]) { ++ dbg_err("ubi%d already exists", ubi_num); ++ return -EEXIST; ++ } + } + +- ubi->ubi_num = ubi_devices_cnt; ++ ubi = kzalloc(sizeof(struct ubi_device), GFP_KERNEL); ++ if (!ubi) ++ return -ENOMEM; ++ + ubi->mtd = mtd; ++ ubi->ubi_num = ubi_num; ++ ubi->vid_hdr_offset = vid_hdr_offset; ++ ubi->autoresize_vol_id = -1; + +- dbg_msg("attaching mtd%d to ubi%d: VID header offset %d data offset %d", +- ubi->mtd->index, ubi_devices_cnt, vid_hdr_offset, data_offset); ++ mutex_init(&ubi->buf_mutex); ++ mutex_init(&ubi->ckvol_mutex); ++ mutex_init(&ubi->mult_mutex); ++ mutex_init(&ubi->volumes_mutex); ++ spin_lock_init(&ubi->volumes_lock); ++ ++ ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num); + +- ubi->vid_hdr_offset = vid_hdr_offset; +- ubi->leb_start = data_offset; + err = io_init(ubi); + if (err) + goto out_free; + +- mutex_init(&ubi->buf_mutex); ++ err = -ENOMEM; + ubi->peb_buf1 = vmalloc(ubi->peb_size); + if (!ubi->peb_buf1) + goto out_free; + + ubi->peb_buf2 = vmalloc(ubi->peb_size); + if (!ubi->peb_buf2) +- goto out_free; ++ goto out_free; + + #ifdef CONFIG_MTD_UBI_DEBUG + mutex_init(&ubi->dbg_buf_mutex); + ubi->dbg_peb_buf = vmalloc(ubi->peb_size); + if (!ubi->dbg_peb_buf) +- goto out_free; ++ goto out_free; + #endif + + err = attach_by_scanning(ubi); +@@ -605,22 +838,29 @@ + goto out_free; + } + ++ if (ubi->autoresize_vol_id != -1) { ++ err = autoresize(ubi, ubi->autoresize_vol_id); ++ if (err) ++ goto out_detach; ++ } ++ + err = uif_init(ubi); + if (err) +- goto out_detach; ++ goto out_nofree; + +- ubi_msg("attached mtd%d to ubi%d", ubi->mtd->index, ubi_devices_cnt); +- ubi_msg("MTD device name: \"%s\"", ubi->mtd->name); ++ ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); ++ if (IS_ERR(ubi->bgt_thread)) { ++ err = PTR_ERR(ubi->bgt_thread); ++ ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, ++ err); ++ goto out_uif; ++ } ++ ++ ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num); ++ ubi_msg("MTD device name: \"%s\"", mtd->name); + ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); +- ubi_msg("physical eraseblock size: %d bytes (%d KiB)", +- ubi->peb_size, ubi->peb_size >> 10); +- ubi_msg("logical eraseblock size: %d bytes", ubi->leb_size); + ubi_msg("number of good PEBs: %d", ubi->good_peb_count); + ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); +- ubi_msg("smallest flash I/O unit: %d", ubi->min_io_size); +- ubi_msg("VID header offset: %d (aligned %d)", +- ubi->vid_hdr_offset, ubi->vid_hdr_aloffset); +- ubi_msg("data offset: %d", ubi->leb_start); + ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); + ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); + ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); +@@ -632,18 +872,22 @@ + ubi->beb_rsvd_pebs); + ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); + +- /* Enable the background thread */ +- if (!DBG_DISABLE_BGT) { ++ if (!DBG_DISABLE_BGT) + ubi->thread_enabled = 1; +- wake_up_process(ubi->bgt_thread); +- } ++ wake_up_process(ubi->bgt_thread); + +- ubi_devices_cnt += 1; +- return 0; ++ ubi_devices[ubi_num] = ubi; ++ return ubi_num; + ++out_uif: ++ uif_close(ubi); ++out_nofree: ++ do_free = 0; + out_detach: +- ubi_eba_close(ubi); + ubi_wl_close(ubi); ++ if (do_free) ++ free_user_volumes(ubi); ++ free_internal_volumes(ubi); + vfree(ubi->vtbl); + out_free: + vfree(ubi->peb_buf1); +@@ -652,24 +896,67 @@ + vfree(ubi->dbg_peb_buf); + #endif + kfree(ubi); +-out_mtd: +- put_mtd_device(mtd); +- ubi_devices[ubi_devices_cnt] = NULL; + return err; + } + + /** +- * detach_mtd_dev - detach an MTD device. +- * @ubi: UBI device description object ++ * ubi_detach_mtd_dev - detach an MTD device. ++ * @ubi_num: UBI device number to detach from ++ * @anyway: detach MTD even if device reference count is not zero ++ * ++ * This function destroys an UBI device number @ubi_num and detaches the ++ * underlying MTD device. Returns zero in case of success and %-EBUSY if the ++ * UBI device is busy and cannot be destroyed, and %-EINVAL if it does not ++ * exist. ++ * ++ * Note, the invocations of this function has to be serialized by the ++ * @ubi_devices_mutex. + */ +-static void detach_mtd_dev(struct ubi_device *ubi) ++int ubi_detach_mtd_dev(int ubi_num, int anyway) + { +- int ubi_num = ubi->ubi_num, mtd_num = ubi->mtd->index; ++ struct ubi_device *ubi; + ++ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) ++ return -EINVAL; ++ ++ spin_lock(&ubi_devices_lock); ++ ubi = ubi_devices[ubi_num]; ++ if (!ubi) { ++ spin_unlock(&ubi_devices_lock); ++ return -EINVAL; ++ } ++ ++ if (ubi->ref_count) { ++ if (!anyway) { ++ spin_unlock(&ubi_devices_lock); ++ return -EBUSY; ++ } ++ /* This may only happen if there is a bug */ ++ ubi_err("%s reference count %d, destroy anyway", ++ ubi->ubi_name, ubi->ref_count); ++ } ++ ubi_devices[ubi_num] = NULL; ++ spin_unlock(&ubi_devices_lock); ++ ++ ubi_assert(ubi_num == ubi->ubi_num); + dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); ++ ++ /* ++ * Before freeing anything, we have to stop the background thread to ++ * prevent it from doing anything on this device while we are freeing. ++ */ ++ if (ubi->bgt_thread) ++ kthread_stop(ubi->bgt_thread); ++ ++ /* ++ * Get a reference to the device in order to prevent 'dev_release()' ++ * from freeing @ubi object. ++ */ ++ get_device(&ubi->dev); ++ + uif_close(ubi); +- ubi_eba_close(ubi); + ubi_wl_close(ubi); ++ free_internal_volumes(ubi); + vfree(ubi->vtbl); + put_mtd_device(ubi->mtd); + vfree(ubi->peb_buf1); +@@ -677,11 +964,37 @@ + #ifdef CONFIG_MTD_UBI_DEBUG + vfree(ubi->dbg_peb_buf); + #endif +- kfree(ubi_devices[ubi_num]); +- ubi_devices[ubi_num] = NULL; +- ubi_devices_cnt -= 1; +- ubi_assert(ubi_devices_cnt >= 0); +- ubi_msg("mtd%d is detached from ubi%d", mtd_num, ubi_num); ++ ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); ++ put_device(&ubi->dev); ++ return 0; ++} ++ ++/** ++ * find_mtd_device - open an MTD device by its name or number. ++ * @mtd_dev: name or number of the device ++ * ++ * This function tries to open and MTD device described by @mtd_dev string, ++ * which is first treated as an ASCII number, and if it is not true, it is ++ * treated as MTD device name. Returns MTD device description object in case of ++ * success and a negative error code in case of failure. ++ */ ++static struct mtd_info * __init open_mtd_device(const char *mtd_dev) ++{ ++ struct mtd_info *mtd; ++ int mtd_num; ++ char *endp; ++ ++ mtd_num = simple_strtoul(mtd_dev, &endp, 0); ++ if (*endp != '\0' || mtd_dev == endp) { ++ /* ++ * This does not look like an ASCII integer, probably this is ++ * MTD device name. ++ */ ++ mtd = get_mtd_device_nm(mtd_dev); ++ } else ++ mtd = get_mtd_device(NULL, mtd_num); ++ ++ return mtd; + } + + static int __init ubi_init(void) +@@ -693,55 +1006,101 @@ + BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); + + if (mtd_devs > UBI_MAX_DEVICES) { +- printk("UBI error: too many MTD devices, maximum is %d\n", +- UBI_MAX_DEVICES); ++ ubi_err("too many MTD devices, maximum is %d", UBI_MAX_DEVICES); + return -EINVAL; + } + ++ /* Create base sysfs directory and sysfs files */ + ubi_class = class_create(THIS_MODULE, UBI_NAME_STR); +- if (IS_ERR(ubi_class)) +- return PTR_ERR(ubi_class); ++ if (IS_ERR(ubi_class)) { ++ err = PTR_ERR(ubi_class); ++ ubi_err("cannot create UBI class"); ++ goto out; ++ } + + err = class_create_file(ubi_class, &ubi_version); +- if (err) ++ if (err) { ++ ubi_err("cannot create sysfs file"); + goto out_class; ++ } ++ ++ err = misc_register(&ubi_ctrl_cdev); ++ if (err) { ++ ubi_err("cannot register device"); ++ goto out_version; ++ } ++ ++ ubi_wl_entry_slab = kmem_cache_create("ubi_wl_entry_slab", ++ sizeof(struct ubi_wl_entry), ++ 0, 0, NULL); ++ if (!ubi_wl_entry_slab) ++ goto out_dev_unreg; + + /* Attach MTD devices */ + for (i = 0; i < mtd_devs; i++) { + struct mtd_dev_param *p = &mtd_dev_param[i]; ++ struct mtd_info *mtd; + + cond_resched(); +- err = attach_mtd_dev(p->name, p->vid_hdr_offs, p->data_offs); +- if (err) ++ ++ mtd = open_mtd_device(p->name); ++ if (IS_ERR(mtd)) { ++ err = PTR_ERR(mtd); + goto out_detach; ++ } ++ ++ mutex_lock(&ubi_devices_mutex); ++ err = ubi_attach_mtd_dev(mtd, UBI_DEV_NUM_AUTO, ++ p->vid_hdr_offs); ++ mutex_unlock(&ubi_devices_mutex); ++ if (err < 0) { ++ put_mtd_device(mtd); ++ ubi_err("cannot attach mtd%d", mtd->index); ++ goto out_detach; ++ } + } + + return 0; + + out_detach: + for (k = 0; k < i; k++) +- detach_mtd_dev(ubi_devices[k]); ++ if (ubi_devices[k]) { ++ mutex_lock(&ubi_devices_mutex); ++ ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); ++ mutex_unlock(&ubi_devices_mutex); ++ } ++ kmem_cache_destroy(ubi_wl_entry_slab); ++out_dev_unreg: ++ misc_deregister(&ubi_ctrl_cdev); ++out_version: + class_remove_file(ubi_class, &ubi_version); + out_class: + class_destroy(ubi_class); ++out: ++ ubi_err("UBI error: cannot initialize UBI, error %d", err); + return err; + } + module_init(ubi_init); + + static void __exit ubi_exit(void) + { +- int i, n = ubi_devices_cnt; ++ int i; + +- for (i = 0; i < n; i++) +- detach_mtd_dev(ubi_devices[i]); ++ for (i = 0; i < UBI_MAX_DEVICES; i++) ++ if (ubi_devices[i]) { ++ mutex_lock(&ubi_devices_mutex); ++ ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); ++ mutex_unlock(&ubi_devices_mutex); ++ } ++ kmem_cache_destroy(ubi_wl_entry_slab); ++ misc_deregister(&ubi_ctrl_cdev); + class_remove_file(ubi_class, &ubi_version); + class_destroy(ubi_class); + } + module_exit(ubi_exit); + + /** +- * bytes_str_to_int - convert a string representing number of bytes to an +- * integer. ++ * bytes_str_to_int - convert a number of bytes string into an integer. + * @str: the string to convert + * + * This function returns positive resulting integer in case of success and a +@@ -754,7 +1113,8 @@ + + result = simple_strtoul(str, &endp, 0); + if (str == endp || result < 0) { +- printk("UBI error: incorrect bytes count: \"%s\"\n", str); ++ printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", ++ str); + return -EINVAL; + } + +@@ -764,15 +1124,14 @@ + case 'M': + result *= 1024; + case 'K': +- case 'k': + result *= 1024; +- if (endp[1] == 'i' && (endp[2] == '\0' || +- endp[2] == 'B' || endp[2] == 'b')) ++ if (endp[1] == 'i' && endp[2] == 'B') + endp += 2; + case '\0': + break; + default: +- printk("UBI error: incorrect bytes count: \"%s\"\n", str); ++ printk(KERN_ERR "UBI error: incorrect bytes count: \"%s\"\n", ++ str); + return -EINVAL; + } + +@@ -793,23 +1152,27 @@ + struct mtd_dev_param *p; + char buf[MTD_PARAM_LEN_MAX]; + char *pbuf = &buf[0]; +- char *tokens[3] = {NULL, NULL, NULL}; ++ char *tokens[2] = {NULL, NULL}; ++ ++ if (!val) ++ return -EINVAL; + + if (mtd_devs == UBI_MAX_DEVICES) { +- printk("UBI error: too many parameters, max. is %d\n", ++ printk(KERN_ERR "UBI error: too many parameters, max. is %d\n", + UBI_MAX_DEVICES); + return -EINVAL; + } + + len = strnlen(val, MTD_PARAM_LEN_MAX); + if (len == MTD_PARAM_LEN_MAX) { +- printk("UBI error: parameter \"%s\" is too long, max. is %d\n", +- val, MTD_PARAM_LEN_MAX); ++ printk(KERN_ERR "UBI error: parameter \"%s\" is too long, " ++ "max. is %d\n", val, MTD_PARAM_LEN_MAX); + return -EINVAL; + } + + if (len == 0) { +- printk("UBI warning: empty 'mtd=' parameter - ignored\n"); ++ printk(KERN_WARNING "UBI warning: empty 'mtd=' parameter - " ++ "ignored\n"); + return 0; + } + +@@ -819,11 +1182,12 @@ + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + +- for (i = 0; i < 3; i++) ++ for (i = 0; i < 2; i++) + tokens[i] = strsep(&pbuf, ","); + + if (pbuf) { +- printk("UBI error: too many arguments at \"%s\"\n", val); ++ printk(KERN_ERR "UBI error: too many arguments at \"%s\"\n", ++ val); + return -EINVAL; + } + +@@ -832,13 +1196,9 @@ + + if (tokens[1]) + p->vid_hdr_offs = bytes_str_to_int(tokens[1]); +- if (tokens[2]) +- p->data_offs = bytes_str_to_int(tokens[2]); + + if (p->vid_hdr_offs < 0) + return p->vid_hdr_offs; +- if (p->data_offs < 0) +- return p->data_offs; + + mtd_devs += 1; + return 0; +@@ -846,16 +1206,15 @@ + + module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); + MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: " +- "mtd=<name|num>[,<vid_hdr_offs>,<data_offs>]. " ++ "mtd=<name|num>[,<vid_hdr_offs>].\n" + "Multiple \"mtd\" parameters may be specified.\n" +- "MTD devices may be specified by their number or name. " +- "Optional \"vid_hdr_offs\" and \"data_offs\" parameters " +- "specify UBI VID header position and data starting " +- "position to be used by UBI.\n" +- "Example: mtd=content,1984,2048 mtd=4 - attach MTD device" +- "with name content using VID header offset 1984 and data " +- "start 2048, and MTD device number 4 using default " +- "offsets"); ++ "MTD devices may be specified by their number or name.\n" ++ "Optional \"vid_hdr_offs\" parameter specifies UBI VID " ++ "header position and data starting position to be used " ++ "by UBI.\n" ++ "Example: mtd=content,1984 mtd=4 - attach MTD device" ++ "with name \"content\" using VID header offset 1984, and " ++ "MTD device number 4 with default VID header offset."); + + MODULE_VERSION(__stringify(UBI_VERSION)); + MODULE_DESCRIPTION("UBI - Unsorted Block Images"); +diff -Nurd linux-2.6.24/drivers/mtd/ubi/cdev.c ubifs-v2.6.24/drivers/mtd/ubi/cdev.c +--- linux-2.6.24/drivers/mtd/ubi/cdev.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/cdev.c 2009-04-07 17:14:47.000000000 +0200 +@@ -28,45 +28,22 @@ + * + * Major and minor numbers are assigned dynamically to both UBI and volume + * character devices. ++ * ++ * Well, there is the third kind of character devices - the UBI control ++ * character device, which allows to manipulate by UBI devices - create and ++ * delete them. In other words, it is used for attaching and detaching MTD ++ * devices. + */ + + #include <linux/module.h> + #include <linux/stat.h> + #include <linux/ioctl.h> + #include <linux/capability.h> ++#include <linux/uaccess.h> ++#include <linux/compat.h> + #include <mtd/ubi-user.h> +-#include <asm/uaccess.h> +-#include <asm/div64.h> + #include "ubi.h" + +-/* +- * Maximum sequence numbers of UBI and volume character device IOCTLs (direct +- * logical eraseblock erase is a debug-only feature). +- */ +-#define UBI_CDEV_IOC_MAX_SEQ 2 +-#ifndef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO +-#define VOL_CDEV_IOC_MAX_SEQ 1 +-#else +-#define VOL_CDEV_IOC_MAX_SEQ 2 +-#endif +- +-/** +- * major_to_device - get UBI device object by character device major number. +- * @major: major number +- * +- * This function returns a pointer to the UBI device object. +- */ +-static struct ubi_device *major_to_device(int major) +-{ +- int i; +- +- for (i = 0; i < ubi_devices_cnt; i++) +- if (ubi_devices[i] && ubi_devices[i]->major == major) +- return ubi_devices[i]; +- BUG(); +- return NULL; +-} +- + /** + * get_exclusive - get exclusive access to an UBI volume. + * @desc: volume descriptor +@@ -124,18 +101,20 @@ + static int vol_cdev_open(struct inode *inode, struct file *file) + { + struct ubi_volume_desc *desc; +- const struct ubi_device *ubi = major_to_device(imajor(inode)); +- int vol_id = iminor(inode) - 1; +- int mode; ++ int vol_id = iminor(inode) - 1, mode, ubi_num; ++ ++ ubi_num = ubi_major2num(imajor(inode)); ++ if (ubi_num < 0) ++ return ubi_num; + + if (file->f_mode & FMODE_WRITE) + mode = UBI_READWRITE; + else + mode = UBI_READONLY; + +- dbg_msg("open volume %d, mode %d", vol_id, mode); ++ dbg_gen("open volume %d, mode %d", vol_id, mode); + +- desc = ubi_open_volume(ubi->ubi_num, vol_id, mode); ++ desc = ubi_open_volume(ubi_num, vol_id, mode); + if (IS_ERR(desc)) + return PTR_ERR(desc); + +@@ -148,13 +127,20 @@ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + +- dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode); ++ dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode); + + if (vol->updating) { + ubi_warn("update of volume %d not finished, volume is damaged", + vol->vol_id); ++ ubi_assert(!vol->changing_leb); + vol->updating = 0; + vfree(vol->upd_buf); ++ } else if (vol->changing_leb) { ++ dbg_gen("only %lld of %lld bytes received for atomic LEB change" ++ " for volume %d:%d, cancel", vol->upd_received, ++ vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id); ++ vol->changing_leb = 0; ++ vfree(vol->upd_buf); + } + + ubi_close_volume(desc); +@@ -192,26 +178,35 @@ + return -EINVAL; + } + +- dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld", ++ dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld", + vol->vol_id, offset, origin, new_offset); + + file->f_pos = new_offset; + return new_offset; + } + ++static int vol_cdev_fsync(struct file *file, struct dentry *dentry, ++ int datasync) ++{ ++ struct ubi_volume_desc *desc = file->private_data; ++ struct ubi_device *ubi = desc->vol->ubi; ++ ++ return ubi_sync(ubi->ubi_num); ++} ++ ++ + static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count, + loff_t *offp) + { + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; +- int err, lnum, off, len, vol_id = desc->vol->vol_id, tbuf_size; ++ int err, lnum, off, len, tbuf_size; + size_t count_save = count; + void *tbuf; +- uint64_t tmp; + +- dbg_msg("read %zd bytes from offset %lld of volume %d", +- count, *offp, vol_id); ++ dbg_gen("read %zd bytes from offset %lld of volume %d", ++ count, *offp, vol->vol_id); + + if (vol->updating) { + dbg_err("updating"); +@@ -225,7 +220,7 @@ + return 0; + + if (vol->corrupted) +- dbg_msg("read from corrupted volume %d", vol_id); ++ dbg_gen("read from corrupted volume %d", vol->vol_id); + + if (*offp + count > vol->used_bytes) + count_save = count = vol->used_bytes - *offp; +@@ -238,10 +233,7 @@ + return -ENOMEM; + + len = count > tbuf_size ? tbuf_size : count; +- +- tmp = *offp; +- off = do_div(tmp, vol->usable_leb_size); +- lnum = tmp; ++ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); + + do { + cond_resched(); +@@ -249,7 +241,7 @@ + if (off + len >= vol->usable_leb_size) + len = vol->usable_leb_size - off; + +- err = ubi_eba_read_leb(ubi, vol_id, lnum, tbuf, off, len, 0); ++ err = ubi_eba_read_leb(ubi, vol, lnum, tbuf, off, len, 0); + if (err) + break; + +@@ -276,12 +268,9 @@ + return err ? err : count_save - count; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO +- + /* + * This function allows to directly write to dynamic UBI volumes, without +- * issuing the volume update operation. Available only as a debugging feature. +- * Very useful for testing UBI. ++ * issuing the volume update operation. + */ + static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) +@@ -289,22 +278,21 @@ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; +- int lnum, off, len, tbuf_size, vol_id = vol->vol_id, err = 0; ++ int lnum, off, len, tbuf_size, err = 0; + size_t count_save = count; + char *tbuf; +- uint64_t tmp; + +- dbg_msg("requested: write %zd bytes to offset %lld of volume %u", +- count, *offp, desc->vol->vol_id); ++ if (!vol->direct_writes) ++ return -EPERM; ++ ++ dbg_gen("requested: write %zd bytes to offset %lld of volume %u", ++ count, *offp, vol->vol_id); + + if (vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + +- tmp = *offp; +- off = do_div(tmp, vol->usable_leb_size); +- lnum = tmp; +- +- if (off % ubi->min_io_size) { ++ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); ++ if (off & (ubi->min_io_size - 1)) { + dbg_err("unaligned position"); + return -EINVAL; + } +@@ -313,7 +301,7 @@ + count_save = count = vol->used_bytes - *offp; + + /* We can write only in fractions of the minimum I/O unit */ +- if (count % ubi->min_io_size) { ++ if (count & (ubi->min_io_size - 1)) { + dbg_err("unaligned write length"); + return -EINVAL; + } +@@ -339,7 +327,7 @@ + break; + } + +- err = ubi_eba_write_leb(ubi, vol_id, lnum, tbuf, off, len, ++ err = ubi_eba_write_leb(ubi, vol, lnum, tbuf, off, len, + UBI_UNKNOWN); + if (err) + break; +@@ -360,10 +348,6 @@ + return err ? err : count_save - count; + } + +-#else +-#define vol_cdev_direct_write(file, buf, count, offp) -EPERM +-#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */ +- + static ssize_t vol_cdev_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) + { +@@ -372,22 +356,32 @@ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + +- if (!vol->updating) ++ if (!vol->updating && !vol->changing_leb) + return vol_cdev_direct_write(file, buf, count, offp); + +- err = ubi_more_update_data(ubi, vol->vol_id, buf, count); ++ if (vol->updating) ++ err = ubi_more_update_data(ubi, vol, buf, count); ++ else ++ err = ubi_more_leb_change_data(ubi, vol, buf, count); ++ + if (err < 0) { +- ubi_err("cannot write %zd bytes of update data", count); ++ ubi_err("cannot accept more %zd bytes of data, error %d", ++ count, err); + return err; + } + + if (err) { + /* +- * Update is finished, @err contains number of actually written +- * bytes now. ++ * The operation is finished, @err contains number of actually ++ * written bytes. + */ + count = err; + ++ if (vol->changing_leb) { ++ revoke_exclusive(desc, UBI_READWRITE); ++ return count; ++ } ++ + err = ubi_check_volume(ubi, vol->vol_id); + if (err < 0) + return err; +@@ -402,12 +396,11 @@ + revoke_exclusive(desc, UBI_READWRITE); + } + +- *offp += count; + return count; + } + +-static int vol_cdev_ioctl(struct inode *inode, struct file *file, +- unsigned int cmd, unsigned long arg) ++static long vol_cdev_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + int err = 0; + struct ubi_volume_desc *desc = file->private_data; +@@ -437,7 +430,8 @@ + break; + } + +- rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad); ++ rsvd_bytes = (long long)vol->reserved_pebs * ++ ubi->leb_size-vol->data_pad; + if (bytes < 0 || bytes > rsvd_bytes) { + err = -EINVAL; + break; +@@ -447,15 +441,49 @@ + if (err < 0) + break; + +- err = ubi_start_update(ubi, vol->vol_id, bytes); ++ err = ubi_start_update(ubi, vol, bytes); + if (bytes == 0) + revoke_exclusive(desc, UBI_READWRITE); ++ break; ++ } + +- file->f_pos = 0; ++ /* Atomic logical eraseblock change command */ ++ case UBI_IOCEBCH: ++ { ++ struct ubi_leb_change_req req; ++ ++ err = copy_from_user(&req, argp, ++ sizeof(struct ubi_leb_change_req)); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ ++ if (desc->mode == UBI_READONLY || ++ vol->vol_type == UBI_STATIC_VOLUME) { ++ err = -EROFS; ++ break; ++ } ++ ++ /* Validate the request */ ++ err = -EINVAL; ++ if (req.lnum < 0 || req.lnum >= vol->reserved_pebs || ++ req.bytes < 0 || req.lnum >= vol->usable_leb_size) ++ break; ++ if (req.dtype != UBI_LONGTERM && req.dtype != UBI_SHORTTERM && ++ req.dtype != UBI_UNKNOWN) ++ break; ++ ++ err = get_exclusive(desc); ++ if (err < 0) ++ break; ++ ++ err = ubi_start_leb_change(ubi, vol, &req); ++ if (req.bytes == 0) ++ revoke_exclusive(desc, UBI_READWRITE); + break; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO + /* Logical eraseblock erasure command */ + case UBI_IOCEBER: + { +@@ -467,7 +495,8 @@ + break; + } + +- if (desc->mode == UBI_READONLY) { ++ if (desc->mode == UBI_READONLY || ++ vol->vol_type == UBI_STATIC_VOLUME) { + err = -EROFS; + break; + } +@@ -477,26 +506,85 @@ + break; + } + +- if (vol->vol_type != UBI_DYNAMIC_VOLUME) { +- err = -EROFS; ++ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); ++ err = ubi_eba_unmap_leb(ubi, vol, lnum); ++ if (err) ++ break; ++ ++ err = ubi_wl_flush(ubi); ++ break; ++ } ++ ++ /* Logical eraseblock map command */ ++ case UBI_IOCEBMAP: ++ { ++ struct ubi_map_req req; ++ ++ err = copy_from_user(&req, argp, sizeof(struct ubi_map_req)); ++ if (err) { ++ err = -EFAULT; + break; + } ++ err = ubi_leb_map(desc, req.lnum, req.dtype); ++ break; ++ } + +- dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); +- err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum); +- if (err) ++ /* Logical eraseblock un-map command */ ++ case UBI_IOCEBUNMAP: ++ { ++ int32_t lnum; ++ ++ err = get_user(lnum, (__user int32_t *)argp); ++ if (err) { ++ err = -EFAULT; + break; ++ } ++ err = ubi_leb_unmap(desc, lnum); ++ break; ++ } + +- err = ubi_wl_flush(ubi); ++ /* Check if logical eraseblock is mapped command */ ++ case UBI_IOCEBISMAP: ++ { ++ int32_t lnum; ++ ++ err = get_user(lnum, (__user int32_t *)argp); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ err = ubi_is_mapped(desc, lnum); ++ break; ++ } ++ ++ /* Set volume property command*/ ++ case UBI_IOCSETPROP: ++ { ++ struct ubi_set_prop_req req; ++ ++ err = copy_from_user(&req, argp, ++ sizeof(struct ubi_set_prop_req)); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ switch (req.property) { ++ case UBI_PROP_DIRECT_WRITE: ++ mutex_lock(&ubi->volumes_mutex); ++ desc->vol->direct_writes = !!req.value; ++ mutex_unlock(&ubi->volumes_mutex); ++ break; ++ default: ++ err = -EINVAL; ++ break; ++ } + break; + } +-#endif + + default: + err = -ENOTTY; + break; + } +- + return err; + } + +@@ -533,7 +621,7 @@ + if (req->alignment > ubi->leb_size) + goto bad; + +- n = req->alignment % ubi->min_io_size; ++ n = req->alignment & (ubi->min_io_size - 1); + if (req->alignment != 1 && n) + goto bad; + +@@ -542,6 +630,10 @@ + goto bad; + } + ++ n = strnlen(req->name, req->name_len + 1); ++ if (n != req->name_len) ++ goto bad; ++ + return 0; + + bad: +@@ -569,8 +661,169 @@ + return 0; + } + +-static int ubi_cdev_ioctl(struct inode *inode, struct file *file, +- unsigned int cmd, unsigned long arg) ++/** ++ * rename_volumes - rename UBI volumes. ++ * @ubi: UBI device description object ++ * @req: volumes re-name request ++ * ++ * This is a helper function for the volume re-name IOCTL which validates the ++ * the request, opens the volume and calls corresponding volumes management ++ * function. Returns zero in case of success and a negative error code in case ++ * of failure. ++ */ ++static int rename_volumes(struct ubi_device *ubi, ++ struct ubi_rnvol_req *req) ++{ ++ int i, n, err; ++ struct list_head rename_list; ++ struct ubi_rename_entry *re, *re1; ++ ++ if (req->count < 0 || req->count > UBI_MAX_RNVOL) ++ return -EINVAL; ++ ++ if (req->count == 0) ++ return 0; ++ ++ /* Validate volume IDs and names in the request */ ++ for (i = 0; i < req->count; i++) { ++ if (req->ents[i].vol_id < 0 || ++ req->ents[i].vol_id >= ubi->vtbl_slots) ++ return -EINVAL; ++ if (req->ents[i].name_len < 0) ++ return -EINVAL; ++ if (req->ents[i].name_len > UBI_VOL_NAME_MAX) ++ return -ENAMETOOLONG; ++ req->ents[i].name[req->ents[i].name_len] = '\0'; ++ n = strlen(req->ents[i].name); ++ if (n != req->ents[i].name_len) ++ err = -EINVAL; ++ } ++ ++ /* Make sure volume IDs and names are unique */ ++ for (i = 0; i < req->count - 1; i++) { ++ for (n = i + 1; n < req->count; n++) { ++ if (req->ents[i].vol_id == req->ents[n].vol_id) { ++ dbg_err("duplicated volume id %d", ++ req->ents[i].vol_id); ++ return -EINVAL; ++ } ++ if (!strcmp(req->ents[i].name, req->ents[n].name)) { ++ dbg_err("duplicated volume name \"%s\"", ++ req->ents[i].name); ++ return -EINVAL; ++ } ++ } ++ } ++ ++ /* Create the re-name list */ ++ INIT_LIST_HEAD(&rename_list); ++ for (i = 0; i < req->count; i++) { ++ int vol_id = req->ents[i].vol_id; ++ int name_len = req->ents[i].name_len; ++ const char *name = req->ents[i].name; ++ ++ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); ++ if (!re) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ ++ re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE); ++ if (IS_ERR(re->desc)) { ++ err = PTR_ERR(re->desc); ++ dbg_err("cannot open volume %d, error %d", vol_id, err); ++ kfree(re); ++ goto out_free; ++ } ++ ++ /* Skip this re-naming if the name does not really change */ ++ if (re->desc->vol->name_len == name_len && ++ !memcmp(re->desc->vol->name, name, name_len)) { ++ ubi_close_volume(re->desc); ++ kfree(re); ++ continue; ++ } ++ ++ re->new_name_len = name_len; ++ memcpy(re->new_name, name, name_len); ++ list_add_tail(&re->list, &rename_list); ++ dbg_msg("will rename volume %d from \"%s\" to \"%s\"", ++ vol_id, re->desc->vol->name, name); ++ } ++ ++ if (list_empty(&rename_list)) ++ return 0; ++ ++ /* Find out the volumes which have to be removed */ ++ list_for_each_entry(re, &rename_list, list) { ++ struct ubi_volume_desc *desc; ++ int no_remove_needed = 0; ++ ++ /* ++ * Volume @re->vol_id is going to be re-named to ++ * @re->new_name, while its current name is @name. If a volume ++ * with name @re->new_name currently exists, it has to be ++ * removed, unless it is also re-named in the request (@req). ++ */ ++ list_for_each_entry(re1, &rename_list, list) { ++ if (re->new_name_len == re1->desc->vol->name_len && ++ !memcmp(re->new_name, re1->desc->vol->name, ++ re1->desc->vol->name_len)) { ++ no_remove_needed = 1; ++ break; ++ } ++ } ++ ++ if (no_remove_needed) ++ continue; ++ ++ /* ++ * It seems we need to remove volume with name @re->new_name, ++ * if it exists. ++ */ ++ desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, ++ UBI_EXCLUSIVE); ++ if (IS_ERR(desc)) { ++ err = PTR_ERR(desc); ++ if (err == -ENODEV) ++ /* Re-naming into a non-existing volume name */ ++ continue; ++ ++ /* The volume exists but busy, or an error occurred */ ++ dbg_err("cannot open volume \"%s\", error %d", ++ re->new_name, err); ++ goto out_free; ++ } ++ ++ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); ++ if (!re) { ++ err = -ENOMEM; ++ ubi_close_volume(desc); ++ goto out_free; ++ } ++ ++ re->remove = 1; ++ re->desc = desc; ++ list_add(&re->list, &rename_list); ++ dbg_msg("will remove volume %d, name \"%s\"", ++ re->desc->vol->vol_id, re->desc->vol->name); ++ } ++ ++ mutex_lock(&ubi->volumes_mutex); ++ err = ubi_rename_volumes(ubi, &rename_list); ++ mutex_unlock(&ubi->volumes_mutex); ++ ++out_free: ++ list_for_each_entry_safe(re, re1, &rename_list, list) { ++ ubi_close_volume(re->desc); ++ list_del(&re->list); ++ kfree(re); ++ } ++ return err; ++} ++ ++static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + int err = 0; + struct ubi_device *ubi; +@@ -580,9 +833,9 @@ + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + +- ubi = major_to_device(imajor(inode)); +- if (IS_ERR(ubi)) +- return PTR_ERR(ubi); ++ ubi = ubi_get_by_major(imajor(file->f_mapping->host)); ++ if (!ubi) ++ return -ENODEV; + + switch (cmd) { + /* Create volume command */ +@@ -590,21 +843,21 @@ + { + struct ubi_mkvol_req req; + +- dbg_msg("create volume"); +- err = copy_from_user(&req, argp, +- sizeof(struct ubi_mkvol_req)); ++ dbg_gen("create volume"); ++ err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req)); + if (err) { + err = -EFAULT; + break; + } + ++ req.name[req.name_len] = '\0'; + err = verify_mkvol_req(ubi, &req); + if (err) + break; + +- req.name[req.name_len] = '\0'; +- ++ mutex_lock(&ubi->volumes_mutex); + err = ubi_create_volume(ubi, &req); ++ mutex_unlock(&ubi->volumes_mutex); + if (err) + break; + +@@ -620,7 +873,7 @@ + { + int vol_id; + +- dbg_msg("remove volume"); ++ dbg_gen("remove volume"); + err = get_user(vol_id, (__user int32_t *)argp); + if (err) { + err = -EFAULT; +@@ -633,10 +886,16 @@ + break; + } + +- err = ubi_remove_volume(desc); +- if (err) +- ubi_close_volume(desc); ++ mutex_lock(&ubi->volumes_mutex); ++ err = ubi_remove_volume(desc, 0); ++ mutex_unlock(&ubi->volumes_mutex); + ++ /* ++ * The volume is deleted (unless an error occurred), and the ++ * 'struct ubi_volume' object will be freed when ++ * 'ubi_close_volume()' will call 'put_device()'. ++ */ ++ ubi_close_volume(desc); + break; + } + +@@ -644,12 +903,10 @@ + case UBI_IOCRSVOL: + { + int pebs; +- uint64_t tmp; + struct ubi_rsvol_req req; + +- dbg_msg("re-size volume"); +- err = copy_from_user(&req, argp, +- sizeof(struct ubi_rsvol_req)); ++ dbg_gen("re-size volume"); ++ err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req)); + if (err) { + err = -EFAULT; + break; +@@ -665,37 +922,182 @@ + break; + } + +- tmp = req.bytes; +- pebs = !!do_div(tmp, desc->vol->usable_leb_size); +- pebs += tmp; ++ pebs = div_u64(req.bytes + desc->vol->usable_leb_size - 1, ++ desc->vol->usable_leb_size); + ++ mutex_lock(&ubi->volumes_mutex); + err = ubi_resize_volume(desc, pebs); ++ mutex_unlock(&ubi->volumes_mutex); + ubi_close_volume(desc); + break; + } + ++ /* Re-name volumes command */ ++ case UBI_IOCRNVOL: ++ { ++ struct ubi_rnvol_req *req; ++ ++ dbg_msg("re-name volumes"); ++ req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL); ++ if (!req) { ++ err = -ENOMEM; ++ break; ++ }; ++ ++ err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req)); ++ if (err) { ++ err = -EFAULT; ++ kfree(req); ++ break; ++ } ++ ++ mutex_lock(&ubi->mult_mutex); ++ err = rename_volumes(ubi, req); ++ mutex_unlock(&ubi->mult_mutex); ++ kfree(req); ++ break; ++ } ++ + default: + err = -ENOTTY; + break; + } + ++ ubi_put_device(ubi); + return err; + } + ++static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ int err = 0; ++ void __user *argp = (void __user *)arg; ++ ++ if (!capable(CAP_SYS_RESOURCE)) ++ return -EPERM; ++ ++ switch (cmd) { ++ /* Attach an MTD device command */ ++ case UBI_IOCATT: ++ { ++ struct ubi_attach_req req; ++ struct mtd_info *mtd; ++ ++ dbg_gen("attach MTD device"); ++ err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req)); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ ++ if (req.mtd_num < 0 || ++ (req.ubi_num < 0 && req.ubi_num != UBI_DEV_NUM_AUTO)) { ++ err = -EINVAL; ++ break; ++ } ++ ++ mtd = get_mtd_device(NULL, req.mtd_num); ++ if (IS_ERR(mtd)) { ++ err = PTR_ERR(mtd); ++ break; ++ } ++ ++ /* ++ * Note, further request verification is done by ++ * 'ubi_attach_mtd_dev()'. ++ */ ++ mutex_lock(&ubi_devices_mutex); ++ err = ubi_attach_mtd_dev(mtd, req.ubi_num, req.vid_hdr_offset); ++ mutex_unlock(&ubi_devices_mutex); ++ if (err < 0) ++ put_mtd_device(mtd); ++ else ++ /* @err contains UBI device number */ ++ err = put_user(err, (__user int32_t *)argp); ++ ++ break; ++ } ++ ++ /* Detach an MTD device command */ ++ case UBI_IOCDET: ++ { ++ int ubi_num; ++ ++ dbg_gen("dettach MTD device"); ++ err = get_user(ubi_num, (__user int32_t *)argp); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ ++ mutex_lock(&ubi_devices_mutex); ++ err = ubi_detach_mtd_dev(ubi_num, 0); ++ mutex_unlock(&ubi_devices_mutex); ++ break; ++ } ++ ++ default: ++ err = -ENOTTY; ++ break; ++ } ++ ++ return err; ++} ++ ++#ifdef CONFIG_COMPAT ++static long vol_cdev_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ unsigned long translated_arg = (unsigned long)compat_ptr(arg); ++ ++ return vol_cdev_ioctl(file, cmd, translated_arg); ++} ++ ++static long ubi_cdev_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ unsigned long translated_arg = (unsigned long)compat_ptr(arg); ++ ++ return ubi_cdev_ioctl(file, cmd, translated_arg); ++} ++ ++static long ctrl_cdev_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ unsigned long translated_arg = (unsigned long)compat_ptr(arg); ++ ++ return ctrl_cdev_ioctl(file, cmd, translated_arg); ++} ++#else ++#define vol_cdev_compat_ioctl NULL ++#define ubi_cdev_compat_ioctl NULL ++#define ctrl_cdev_compat_ioctl NULL ++#endif ++ ++/* UBI volume character device operations */ ++const struct file_operations ubi_vol_cdev_operations = { ++ .owner = THIS_MODULE, ++ .open = vol_cdev_open, ++ .release = vol_cdev_release, ++ .llseek = vol_cdev_llseek, ++ .read = vol_cdev_read, ++ .write = vol_cdev_write, ++ .fsync = vol_cdev_fsync, ++ .unlocked_ioctl = vol_cdev_ioctl, ++ .compat_ioctl = vol_cdev_compat_ioctl, ++}; ++ + /* UBI character device operations */ +-struct file_operations ubi_cdev_operations = { +- .owner = THIS_MODULE, +- .ioctl = ubi_cdev_ioctl, +- .llseek = no_llseek, ++const struct file_operations ubi_cdev_operations = { ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++ .unlocked_ioctl = ubi_cdev_ioctl, ++ .compat_ioctl = ubi_cdev_compat_ioctl, + }; + +-/* UBI volume character device operations */ +-struct file_operations ubi_vol_cdev_operations = { +- .owner = THIS_MODULE, +- .open = vol_cdev_open, +- .release = vol_cdev_release, +- .llseek = vol_cdev_llseek, +- .read = vol_cdev_read, +- .write = vol_cdev_write, +- .ioctl = vol_cdev_ioctl, ++/* UBI control character device operations */ ++const struct file_operations ubi_ctrl_cdev_operations = { ++ .owner = THIS_MODULE, ++ .unlocked_ioctl = ctrl_cdev_ioctl, ++ .compat_ioctl = ctrl_cdev_compat_ioctl, + }; +diff -Nurd linux-2.6.24/drivers/mtd/ubi/debug.c ubifs-v2.6.24/drivers/mtd/ubi/debug.c +--- linux-2.6.24/drivers/mtd/ubi/debug.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/debug.c 2009-04-07 17:14:47.000000000 +0200 +@@ -24,7 +24,7 @@ + * changes. + */ + +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG ++#ifdef CONFIG_MTD_UBI_DEBUG + + #include "ubi.h" + +@@ -34,14 +34,19 @@ + */ + void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) + { +- dbg_msg("erase counter header dump:"); +- dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic)); +- dbg_msg("version %d", (int)ec_hdr->version); +- dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec)); +- dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset)); +- dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset)); +- dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc)); +- dbg_msg("erase counter header hexdump:"); ++ printk(KERN_DEBUG "Erase counter header dump:\n"); ++ printk(KERN_DEBUG "\tmagic %#08x\n", ++ be32_to_cpu(ec_hdr->magic)); ++ printk(KERN_DEBUG "\tversion %d\n", (int)ec_hdr->version); ++ printk(KERN_DEBUG "\tec %llu\n", ++ (long long)be64_to_cpu(ec_hdr->ec)); ++ printk(KERN_DEBUG "\tvid_hdr_offset %d\n", ++ be32_to_cpu(ec_hdr->vid_hdr_offset)); ++ printk(KERN_DEBUG "\tdata_offset %d\n", ++ be32_to_cpu(ec_hdr->data_offset)); ++ printk(KERN_DEBUG "\thdr_crc %#08x\n", ++ be32_to_cpu(ec_hdr->hdr_crc)); ++ printk(KERN_DEBUG "erase counter header hexdump:\n"); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ec_hdr, UBI_EC_HDR_SIZE, 1); + } +@@ -52,22 +57,23 @@ + */ + void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) + { +- dbg_msg("volume identifier header dump:"); +- dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic)); +- dbg_msg("version %d", (int)vid_hdr->version); +- dbg_msg("vol_type %d", (int)vid_hdr->vol_type); +- dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); +- dbg_msg("compat %d", (int)vid_hdr->compat); +- dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id)); +- dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum)); +- dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver)); +- dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size)); +- dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs)); +- dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad)); +- dbg_msg("sqnum %llu", ++ printk(KERN_DEBUG "Volume identifier header dump:\n"); ++ printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); ++ printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); ++ printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); ++ printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); ++ printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); ++ printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); ++ printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); ++ printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); ++ printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); ++ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); ++ printk(KERN_DEBUG "\tsqnum %llu\n", + (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); +- dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc)); +- dbg_msg("volume identifier header hexdump:"); ++ printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); ++ printk(KERN_DEBUG "Volume identifier header hexdump:\n"); ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ++ vid_hdr, UBI_VID_HDR_SIZE, 1); + } + + /** +@@ -76,27 +82,27 @@ + */ + void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) + { +- dbg_msg("volume information dump:"); +- dbg_msg("vol_id %d", vol->vol_id); +- dbg_msg("reserved_pebs %d", vol->reserved_pebs); +- dbg_msg("alignment %d", vol->alignment); +- dbg_msg("data_pad %d", vol->data_pad); +- dbg_msg("vol_type %d", vol->vol_type); +- dbg_msg("name_len %d", vol->name_len); +- dbg_msg("usable_leb_size %d", vol->usable_leb_size); +- dbg_msg("used_ebs %d", vol->used_ebs); +- dbg_msg("used_bytes %lld", vol->used_bytes); +- dbg_msg("last_eb_bytes %d", vol->last_eb_bytes); +- dbg_msg("corrupted %d", vol->corrupted); +- dbg_msg("upd_marker %d", vol->upd_marker); ++ printk(KERN_DEBUG "Volume information dump:\n"); ++ printk(KERN_DEBUG "\tvol_id %d\n", vol->vol_id); ++ printk(KERN_DEBUG "\treserved_pebs %d\n", vol->reserved_pebs); ++ printk(KERN_DEBUG "\talignment %d\n", vol->alignment); ++ printk(KERN_DEBUG "\tdata_pad %d\n", vol->data_pad); ++ printk(KERN_DEBUG "\tvol_type %d\n", vol->vol_type); ++ printk(KERN_DEBUG "\tname_len %d\n", vol->name_len); ++ printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size); ++ printk(KERN_DEBUG "\tused_ebs %d\n", vol->used_ebs); ++ printk(KERN_DEBUG "\tused_bytes %lld\n", vol->used_bytes); ++ printk(KERN_DEBUG "\tlast_eb_bytes %d\n", vol->last_eb_bytes); ++ printk(KERN_DEBUG "\tcorrupted %d\n", vol->corrupted); ++ printk(KERN_DEBUG "\tupd_marker %d\n", vol->upd_marker); + + if (vol->name_len <= UBI_VOL_NAME_MAX && + strnlen(vol->name, vol->name_len + 1) == vol->name_len) { +- dbg_msg("name %s", vol->name); ++ printk(KERN_DEBUG "\tname %s\n", vol->name); + } else { +- dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c", +- vol->name[0], vol->name[1], vol->name[2], +- vol->name[3], vol->name[4]); ++ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n", ++ vol->name[0], vol->name[1], vol->name[2], ++ vol->name[3], vol->name[4]); + } + } + +@@ -109,28 +115,29 @@ + { + int name_len = be16_to_cpu(r->name_len); + +- dbg_msg("volume table record %d dump:", idx); +- dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs)); +- dbg_msg("alignment %d", be32_to_cpu(r->alignment)); +- dbg_msg("data_pad %d", be32_to_cpu(r->data_pad)); +- dbg_msg("vol_type %d", (int)r->vol_type); +- dbg_msg("upd_marker %d", (int)r->upd_marker); +- dbg_msg("name_len %d", name_len); ++ printk(KERN_DEBUG "Volume table record %d dump:\n", idx); ++ printk(KERN_DEBUG "\treserved_pebs %d\n", ++ be32_to_cpu(r->reserved_pebs)); ++ printk(KERN_DEBUG "\talignment %d\n", be32_to_cpu(r->alignment)); ++ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(r->data_pad)); ++ printk(KERN_DEBUG "\tvol_type %d\n", (int)r->vol_type); ++ printk(KERN_DEBUG "\tupd_marker %d\n", (int)r->upd_marker); ++ printk(KERN_DEBUG "\tname_len %d\n", name_len); + + if (r->name[0] == '\0') { +- dbg_msg("name NULL"); ++ printk(KERN_DEBUG "\tname NULL\n"); + return; + } + + if (name_len <= UBI_VOL_NAME_MAX && + strnlen(&r->name[0], name_len + 1) == name_len) { +- dbg_msg("name %s", &r->name[0]); ++ printk(KERN_DEBUG "\tname %s\n", &r->name[0]); + } else { +- dbg_msg("1st 5 characters of the name: %c%c%c%c%c", ++ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n", + r->name[0], r->name[1], r->name[2], r->name[3], + r->name[4]); + } +- dbg_msg("crc %#08x", be32_to_cpu(r->crc)); ++ printk(KERN_DEBUG "\tcrc %#08x\n", be32_to_cpu(r->crc)); + } + + /** +@@ -139,15 +146,15 @@ + */ + void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) + { +- dbg_msg("volume scanning information dump:"); +- dbg_msg("vol_id %d", sv->vol_id); +- dbg_msg("highest_lnum %d", sv->highest_lnum); +- dbg_msg("leb_count %d", sv->leb_count); +- dbg_msg("compat %d", sv->compat); +- dbg_msg("vol_type %d", sv->vol_type); +- dbg_msg("used_ebs %d", sv->used_ebs); +- dbg_msg("last_data_size %d", sv->last_data_size); +- dbg_msg("data_pad %d", sv->data_pad); ++ printk(KERN_DEBUG "Volume scanning information dump:\n"); ++ printk(KERN_DEBUG "\tvol_id %d\n", sv->vol_id); ++ printk(KERN_DEBUG "\thighest_lnum %d\n", sv->highest_lnum); ++ printk(KERN_DEBUG "\tleb_count %d\n", sv->leb_count); ++ printk(KERN_DEBUG "\tcompat %d\n", sv->compat); ++ printk(KERN_DEBUG "\tvol_type %d\n", sv->vol_type); ++ printk(KERN_DEBUG "\tused_ebs %d\n", sv->used_ebs); ++ printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size); ++ printk(KERN_DEBUG "\tdata_pad %d\n", sv->data_pad); + } + + /** +@@ -157,14 +164,13 @@ + */ + void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type) + { +- dbg_msg("eraseblock scanning information dump:"); +- dbg_msg("ec %d", seb->ec); +- dbg_msg("pnum %d", seb->pnum); ++ printk(KERN_DEBUG "eraseblock scanning information dump:\n"); ++ printk(KERN_DEBUG "\tec %d\n", seb->ec); ++ printk(KERN_DEBUG "\tpnum %d\n", seb->pnum); + if (type == 0) { +- dbg_msg("lnum %d", seb->lnum); +- dbg_msg("scrub %d", seb->scrub); +- dbg_msg("sqnum %llu", seb->sqnum); +- dbg_msg("leb_ver %u", seb->leb_ver); ++ printk(KERN_DEBUG "\tlnum %d\n", seb->lnum); ++ printk(KERN_DEBUG "\tscrub %d\n", seb->scrub); ++ printk(KERN_DEBUG "\tsqnum %llu\n", seb->sqnum); + } + } + +@@ -176,16 +182,16 @@ + { + char nm[17]; + +- dbg_msg("volume creation request dump:"); +- dbg_msg("vol_id %d", req->vol_id); +- dbg_msg("alignment %d", req->alignment); +- dbg_msg("bytes %lld", (long long)req->bytes); +- dbg_msg("vol_type %d", req->vol_type); +- dbg_msg("name_len %d", req->name_len); ++ printk(KERN_DEBUG "Volume creation request dump:\n"); ++ printk(KERN_DEBUG "\tvol_id %d\n", req->vol_id); ++ printk(KERN_DEBUG "\talignment %d\n", req->alignment); ++ printk(KERN_DEBUG "\tbytes %lld\n", (long long)req->bytes); ++ printk(KERN_DEBUG "\tvol_type %d\n", req->vol_type); ++ printk(KERN_DEBUG "\tname_len %d\n", req->name_len); + + memcpy(nm, req->name, 16); + nm[16] = 0; +- dbg_msg("the 1st 16 characters of the name: %s", nm); ++ printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm); + } + +-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff -Nurd linux-2.6.24/drivers/mtd/ubi/debug.h ubifs-v2.6.24/drivers/mtd/ubi/debug.h +--- linux-2.6.24/drivers/mtd/ubi/debug.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/debug.h 2009-04-07 17:14:47.000000000 +0200 +@@ -24,23 +24,19 @@ + #ifdef CONFIG_MTD_UBI_DEBUG + #include <linux/random.h> + +-#define ubi_assert(expr) BUG_ON(!(expr)) + #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) +-#else +-#define ubi_assert(expr) ({}) +-#define dbg_err(fmt, ...) ({}) +-#endif + +-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT +-#define DBG_DISABLE_BGT 1 +-#else +-#define DBG_DISABLE_BGT 0 +-#endif ++#define ubi_assert(expr) do { \ ++ if (unlikely(!(expr))) { \ ++ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ ++ __func__, __LINE__, current->pid); \ ++ ubi_dbg_dump_stack(); \ ++ } \ ++} while (0) + +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG +-/* Generic debugging message */ +-#define dbg_msg(fmt, ...) \ +- printk(KERN_DEBUG "UBI DBG: %s: " fmt "\n", __FUNCTION__, ##__VA_ARGS__) ++#define dbg_msg(fmt, ...) \ ++ printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \ ++ current->pid, __FUNCTION__, ##__VA_ARGS__) + + #define ubi_dbg_dump_stack() dump_stack() + +@@ -60,54 +56,47 @@ + void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); + void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); + ++#ifdef CONFIG_MTD_UBI_DEBUG_MSG ++/* General debugging messages */ ++#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + #else +- +-#define dbg_msg(fmt, ...) ({}) +-#define ubi_dbg_dump_stack() ({}) +-#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) +-#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) +-#define ubi_dbg_dump_vol_info(vol) ({}) +-#define ubi_dbg_dump_vtbl_record(r, idx) ({}) +-#define ubi_dbg_dump_sv(sv) ({}) +-#define ubi_dbg_dump_seb(seb, type) ({}) +-#define ubi_dbg_dump_mkvol_req(req) ({}) +- +-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ ++#define dbg_gen(fmt, ...) ({}) ++#endif + + #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA +-/* Messages from the eraseblock association unit */ +-#define dbg_eba(fmt, ...) \ +- printk(KERN_DEBUG "UBI DBG eba: %s: " fmt "\n", __FUNCTION__, \ +- ##__VA_ARGS__) ++/* Messages from the eraseblock association sub-system */ ++#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + #else + #define dbg_eba(fmt, ...) ({}) + #endif + + #ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL +-/* Messages from the wear-leveling unit */ +-#define dbg_wl(fmt, ...) \ +- printk(KERN_DEBUG "UBI DBG wl: %s: " fmt "\n", __FUNCTION__, \ +- ##__VA_ARGS__) ++/* Messages from the wear-leveling sub-system */ ++#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + #else + #define dbg_wl(fmt, ...) ({}) + #endif + + #ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO +-/* Messages from the input/output unit */ +-#define dbg_io(fmt, ...) \ +- printk(KERN_DEBUG "UBI DBG io: %s: " fmt "\n", __FUNCTION__, \ +- ##__VA_ARGS__) ++/* Messages from the input/output sub-system */ ++#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + #else + #define dbg_io(fmt, ...) ({}) + #endif + + #ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD + /* Initialization and build messages */ +-#define dbg_bld(fmt, ...) \ +- printk(KERN_DEBUG "UBI DBG bld: %s: " fmt "\n", __FUNCTION__, \ +- ##__VA_ARGS__) ++#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define UBI_IO_DEBUG 1 + #else + #define dbg_bld(fmt, ...) ({}) ++#define UBI_IO_DEBUG 0 ++#endif ++ ++#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT ++#define DBG_DISABLE_BGT 1 ++#else ++#define DBG_DISABLE_BGT 0 + #endif + + #ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS +@@ -154,4 +143,50 @@ + #define ubi_dbg_is_erase_failure() 0 + #endif + ++#else ++ ++#define ubi_assert(expr) ({}) ++#define dbg_err(fmt, ...) ({}) ++#define dbg_msg(fmt, ...) ({}) ++#define dbg_gen(fmt, ...) ({}) ++#define dbg_eba(fmt, ...) ({}) ++#define dbg_wl(fmt, ...) ({}) ++#define dbg_io(fmt, ...) ({}) ++#define dbg_bld(fmt, ...) ({}) ++#define ubi_dbg_dump_stack() ({}) ++#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) ++#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) ++#define ubi_dbg_dump_vol_info(vol) ({}) ++#define ubi_dbg_dump_vtbl_record(r, idx) ({}) ++#define ubi_dbg_dump_sv(sv) ({}) ++#define ubi_dbg_dump_seb(seb, type) ({}) ++#define ubi_dbg_dump_mkvol_req(req) ({}) ++ ++#define UBI_IO_DEBUG 0 ++#define DBG_DISABLE_BGT 0 ++#define ubi_dbg_is_bitflip() 0 ++#define ubi_dbg_is_write_failure() 0 ++#define ubi_dbg_is_erase_failure() 0 ++ ++#endif /* !CONFIG_MTD_UBI_DEBUG */ ++ ++/* ++ * Some compatibility stuff goes here. ++ */ ++ ++#include <asm/div64.h> ++ ++static inline uint64_t div_u64(uint64_t dividend, uint64_t divisor) ++{ ++ do_div(dividend, divisor); ++ return dividend; ++} ++ ++static inline uint64_t div_u64_rem(uint64_t dividend, uint32_t divisor, ++ uint32_t *remainder) ++{ ++ *remainder = do_div(dividend, divisor); ++ return dividend; ++} ++ + #endif /* !__UBI_DEBUG_H__ */ +diff -Nurd linux-2.6.24/drivers/mtd/ubi/eba.c ubifs-v2.6.24/drivers/mtd/ubi/eba.c +--- linux-2.6.24/drivers/mtd/ubi/eba.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/eba.c 2009-04-07 17:14:47.000000000 +0200 +@@ -19,20 +19,20 @@ + */ + + /* +- * The UBI Eraseblock Association (EBA) unit. ++ * The UBI Eraseblock Association (EBA) sub-system. + * +- * This unit is responsible for I/O to/from logical eraseblock. ++ * This sub-system is responsible for I/O to/from logical eraseblock. + * + * Although in this implementation the EBA table is fully kept and managed in + * RAM, which assumes poor scalability, it might be (partially) maintained on + * flash in future implementations. + * +- * The EBA unit implements per-logical eraseblock locking. Before accessing a +- * logical eraseblock it is locked for reading or writing. The per-logical +- * eraseblock locking is implemented by means of the lock tree. The lock tree +- * is an RB-tree which refers all the currently locked logical eraseblocks. The +- * lock tree elements are &struct ltree_entry objects. They are indexed by +- * (@vol_id, @lnum) pairs. ++ * The EBA sub-system implements per-logical eraseblock locking. Before ++ * accessing a logical eraseblock it is locked for reading or writing. The ++ * per-logical eraseblock locking is implemented by means of the lock tree. The ++ * lock tree is an RB-tree which refers all the currently locked logical ++ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. ++ * They are indexed by (@vol_id, @lnum) pairs. + * + * EBA also maintains the global sequence counter which is incremented each + * time a logical eraseblock is mapped to a physical eraseblock and it is +@@ -50,29 +50,6 @@ + #define EBA_RESERVED_PEBS 1 + + /** +- * struct ltree_entry - an entry in the lock tree. +- * @rb: links RB-tree nodes +- * @vol_id: volume ID of the locked logical eraseblock +- * @lnum: locked logical eraseblock number +- * @users: how many tasks are using this logical eraseblock or wait for it +- * @mutex: read/write mutex to implement read/write access serialization to +- * the (@vol_id, @lnum) logical eraseblock +- * +- * When a logical eraseblock is being locked - corresponding &struct ltree_entry +- * object is inserted to the lock tree (@ubi->ltree). +- */ +-struct ltree_entry { +- struct rb_node rb; +- int vol_id; +- int lnum; +- int users; +- struct rw_semaphore mutex; +-}; +- +-/* Slab cache for lock-tree entries */ +-static struct kmem_cache *ltree_slab; +- +-/** + * next_sqnum - get next sequence number. + * @ubi: UBI device description object + * +@@ -101,7 +78,7 @@ + */ + static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) + { +- if (vol_id == UBI_LAYOUT_VOL_ID) ++ if (vol_id == UBI_LAYOUT_VOLUME_ID) + return UBI_LAYOUT_VOLUME_COMPAT; + return 0; + } +@@ -112,20 +89,20 @@ + * @vol_id: volume ID + * @lnum: logical eraseblock number + * +- * This function returns a pointer to the corresponding &struct ltree_entry ++ * This function returns a pointer to the corresponding &struct ubi_ltree_entry + * object if the logical eraseblock is locked and %NULL if it is not. + * @ubi->ltree_lock has to be locked. + */ +-static struct ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, +- int lnum) ++static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, ++ int lnum) + { + struct rb_node *p; + + p = ubi->ltree.rb_node; + while (p) { +- struct ltree_entry *le; ++ struct ubi_ltree_entry *le; + +- le = rb_entry(p, struct ltree_entry, rb); ++ le = rb_entry(p, struct ubi_ltree_entry, rb); + + if (vol_id < le->vol_id) + p = p->rb_left; +@@ -155,15 +132,17 @@ + * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation + * failed. + */ +-static struct ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id, +- int lnum) ++static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, ++ int vol_id, int lnum) + { +- struct ltree_entry *le, *le1, *le_free; ++ struct ubi_ltree_entry *le, *le1, *le_free; + +- le = kmem_cache_alloc(ltree_slab, GFP_NOFS); ++ le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); + if (!le) + return ERR_PTR(-ENOMEM); + ++ le->users = 0; ++ init_rwsem(&le->mutex); + le->vol_id = vol_id; + le->lnum = lnum; + +@@ -189,7 +168,7 @@ + p = &ubi->ltree.rb_node; + while (*p) { + parent = *p; +- le1 = rb_entry(parent, struct ltree_entry, rb); ++ le1 = rb_entry(parent, struct ubi_ltree_entry, rb); + + if (vol_id < le1->vol_id) + p = &(*p)->rb_left; +@@ -210,9 +189,7 @@ + le->users += 1; + spin_unlock(&ubi->ltree_lock); + +- if (le_free) +- kmem_cache_free(ltree_slab, le_free); +- ++ kfree(le_free); + return le; + } + +@@ -227,7 +204,7 @@ + */ + static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) + { +- struct ltree_entry *le; ++ struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) +@@ -244,22 +221,18 @@ + */ + static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) + { +- int free = 0; +- struct ltree_entry *le; ++ struct ubi_ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); ++ up_read(&le->mutex); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); +- free = 1; ++ kfree(le); + } + spin_unlock(&ubi->ltree_lock); +- +- up_read(&le->mutex); +- if (free) +- kmem_cache_free(ltree_slab, le); + } + + /** +@@ -273,7 +246,7 @@ + */ + static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) + { +- struct ltree_entry *le; ++ struct ubi_ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) +@@ -283,6 +256,40 @@ + } + + /** ++ * leb_write_lock - lock logical eraseblock for writing. ++ * @ubi: UBI device description object ++ * @vol_id: volume ID ++ * @lnum: logical eraseblock number ++ * ++ * This function locks a logical eraseblock for writing if there is no ++ * contention and does nothing if there is contention. Returns %0 in case of ++ * success, %1 in case of contention, and and a negative error code in case of ++ * failure. ++ */ ++static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) ++{ ++ struct ubi_ltree_entry *le; ++ ++ le = ltree_add_entry(ubi, vol_id, lnum); ++ if (IS_ERR(le)) ++ return PTR_ERR(le); ++ if (down_write_trylock(&le->mutex)) ++ return 0; ++ ++ /* Contention, cancel */ ++ spin_lock(&ubi->ltree_lock); ++ le->users -= 1; ++ ubi_assert(le->users >= 0); ++ if (le->users == 0) { ++ rb_erase(&le->rb, &ubi->ltree); ++ kfree(le); ++ } ++ spin_unlock(&ubi->ltree_lock); ++ ++ return 1; ++} ++ ++/** + * leb_write_unlock - unlock logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID +@@ -290,39 +297,34 @@ + */ + static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) + { +- int free; +- struct ltree_entry *le; ++ struct ubi_ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); ++ up_write(&le->mutex); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); +- free = 1; +- } else +- free = 0; ++ kfree(le); ++ } + spin_unlock(&ubi->ltree_lock); +- +- up_write(&le->mutex); +- if (free) +- kmem_cache_free(ltree_slab, le); + } + + /** + * ubi_eba_unmap_leb - un-map logical eraseblock. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and schedules corresponding + * physical eraseblock for erasure. Returns zero in case of success and a + * negative error code in case of failure. + */ +-int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum) ++int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, ++ int lnum) + { +- int idx = vol_id2idx(ubi, vol_id), err, pnum; +- struct ubi_volume *vol = ubi->volumes[idx]; ++ int err, pnum, vol_id = vol->vol_id; + + if (ubi->ro_mode) + return -EROFS; +@@ -349,7 +351,7 @@ + /** + * ubi_eba_read_leb - read data. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: buffer to store the read data + * @offset: offset from where to read +@@ -365,12 +367,11 @@ + * returned for any volume type if an ECC error was detected by the MTD device + * driver. Other negative error cored may be returned in case of other errors. + */ +-int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, +- int offset, int len, int check) ++int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, ++ void *buf, int offset, int len, int check) + { +- int err, pnum, scrub = 0, idx = vol_id2idx(ubi, vol_id); ++ int err, pnum, scrub = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; +- struct ubi_volume *vol = ubi->volumes[idx]; + uint32_t uninitialized_var(crc); + + err = leb_read_lock(ubi, vol_id, lnum); +@@ -500,16 +501,12 @@ + struct ubi_vid_hdr *vid_hdr; + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); +- if (!vid_hdr) { ++ if (!vid_hdr) + return -ENOMEM; +- } +- +- mutex_lock(&ubi->buf_mutex); + + retry: + new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); + if (new_pnum < 0) { +- mutex_unlock(&ubi->buf_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return new_pnum; + } +@@ -529,20 +526,23 @@ + goto write_error; + + data_size = offset + len; ++ mutex_lock(&ubi->buf_mutex); + memset(ubi->peb_buf1 + offset, 0xFF, len); + + /* Read everything before the area where the write failure happened */ + if (offset > 0) { + err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); + if (err && err != UBI_IO_BITFLIPS) +- goto out_put; ++ goto out_unlock; + } + + memcpy(ubi->peb_buf1 + offset, buf, len); + + err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); +- if (err) ++ if (err) { ++ mutex_unlock(&ubi->buf_mutex); + goto write_error; ++ } + + mutex_unlock(&ubi->buf_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); +@@ -553,8 +553,9 @@ + ubi_msg("data was successfully recovered"); + return 0; + +-out_put: ++out_unlock: + mutex_unlock(&ubi->buf_mutex); ++out_put: + ubi_wl_put_peb(ubi, new_pnum, 1); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; +@@ -567,7 +568,6 @@ + ubi_warn("failed to write to PEB %d", new_pnum); + ubi_wl_put_peb(ubi, new_pnum, 1); + if (++tries > UBI_IO_RETRIES) { +- mutex_unlock(&ubi->buf_mutex); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } +@@ -578,7 +578,7 @@ + /** + * ubi_eba_write_leb - write data to dynamic volume. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: the data to write + * @offset: offset within the logical eraseblock where to write +@@ -586,15 +586,14 @@ + * @dtype: data type + * + * This function writes data to logical eraseblock @lnum of a dynamic volume +- * @vol_id. Returns zero in case of success and a negative error code in case ++ * @vol. Returns zero in case of success and a negative error code in case + * of failure. In case of error, it is possible that something was still + * written to the flash media, but may be some garbage. + */ +-int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum, ++int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + const void *buf, int offset, int len, int dtype) + { +- int idx = vol_id2idx(ubi, vol_id), err, pnum, tries = 0; +- struct ubi_volume *vol = ubi->volumes[idx]; ++ int err, pnum, tries = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + + if (ubi->ro_mode) +@@ -613,7 +612,8 @@ + if (err) { + ubi_warn("failed to write data to PEB %d", pnum); + if (err == -EIO && ubi->bad_allowed) +- err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len); ++ err = recover_peb(ubi, pnum, vol_id, lnum, buf, ++ offset, len); + if (err) + ubi_ro_mode(ubi); + } +@@ -656,11 +656,14 @@ + goto write_error; + } + +- err = ubi_io_write_data(ubi, buf, pnum, offset, len); +- if (err) { +- ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, " +- "PEB %d", len, offset, vol_id, lnum, pnum); +- goto write_error; ++ if (len) { ++ err = ubi_io_write_data(ubi, buf, pnum, offset, len); ++ if (err) { ++ ubi_warn("failed to write %d bytes at offset %d of " ++ "LEB %d:%d, PEB %d", len, offset, vol_id, ++ lnum, pnum); ++ goto write_error; ++ } + } + + vol->eba_tbl[lnum] = pnum; +@@ -698,7 +701,7 @@ + /** + * ubi_eba_write_leb_st - write data to static volume. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write +@@ -706,7 +709,7 @@ + * @used_ebs: how many logical eraseblocks will this volume contain + * + * This function writes data to logical eraseblock @lnum of static volume +- * @vol_id. The @used_ebs argument should contain total number of logical ++ * @vol. The @used_ebs argument should contain total number of logical + * eraseblock in this static volume. + * + * When writing to the last logical eraseblock, the @len argument doesn't have +@@ -718,12 +721,11 @@ + * volumes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +-int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum, +- const void *buf, int len, int dtype, int used_ebs) ++int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, ++ int lnum, const void *buf, int len, int dtype, ++ int used_ebs) + { +- int err, pnum, tries = 0, data_size = len; +- int idx = vol_id2idx(ubi, vol_id); +- struct ubi_volume *vol = ubi->volumes[idx]; ++ int err, pnum, tries = 0, data_size = len, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + +@@ -734,7 +736,7 @@ + /* If this is the last LEB @len may be unaligned */ + len = ALIGN(data_size, ubi->min_io_size); + else +- ubi_assert(len % ubi->min_io_size == 0); ++ ubi_assert(!(len & (ubi->min_io_size - 1))); + + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) +@@ -819,7 +821,7 @@ + /* + * ubi_eba_atomic_leb_change - change logical eraseblock atomically. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write +@@ -834,17 +836,27 @@ + * UBI reserves one LEB for the "atomic LEB change" operation, so only one + * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. + */ +-int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum, +- const void *buf, int len, int dtype) ++int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, ++ int lnum, const void *buf, int len, int dtype) + { +- int err, pnum, tries = 0, idx = vol_id2idx(ubi, vol_id); +- struct ubi_volume *vol = ubi->volumes[idx]; ++ int err, pnum, tries = 0, vol_id = vol->vol_id; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + + if (ubi->ro_mode) + return -EROFS; + ++ if (len == 0) { ++ /* ++ * Special case when data length is zero. In this case the LEB ++ * has to be unmapped and mapped somewhere else. ++ */ ++ err = ubi_eba_unmap_leb(ubi, vol, lnum); ++ if (err) ++ return err; ++ return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); ++ } ++ + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; +@@ -891,7 +903,7 @@ + } + + if (vol->eba_tbl[lnum] >= 0) { +- err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1); ++ err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0); + if (err) + goto out_leb_unlock; + } +@@ -928,20 +940,6 @@ + } + + /** +- * ltree_entry_ctor - lock tree entries slab cache constructor. +- * @obj: the lock-tree entry to construct +- * @cache: the lock tree entry slab cache +- * @flags: constructor flags +- */ +-static void ltree_entry_ctor(struct kmem_cache *cache, void *obj) +-{ +- struct ltree_entry *le = obj; +- +- le->users = 0; +- init_rwsem(&le->mutex); +-} +- +-/** + * ubi_eba_copy_leb - copy logical eraseblock. + * @ubi: UBI device description object + * @from: physical eraseblock number from where to copy +@@ -950,14 +948,20 @@ + * + * This function copies logical eraseblock from physical eraseblock @from to + * physical eraseblock @to. The @vid_hdr buffer may be changed by this +- * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation +- * was canceled because bit-flips were detected at the target PEB, and a +- * negative error code in case of failure. ++ * function. Returns: ++ * o %0 in case of success; ++ * o %1 if the operation was canceled because the volume is being deleted ++ * or because the PEB was put meanwhile; ++ * o %2 if the operation was canceled because there was a write error to the ++ * target PEB; ++ * o %-EAGAIN if the operation was canceled because a bit-flip was detected ++ * in the target PEB; ++ * o a negative error code in case of failure. + */ + int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr) + { +- int err, vol_id, lnum, data_size, aldata_size, pnum, idx; ++ int err, vol_id, lnum, data_size, aldata_size, idx; + struct ubi_volume *vol; + uint32_t crc; + +@@ -973,51 +977,67 @@ + data_size = aldata_size = + ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); + +- /* +- * We do not want anybody to write to this logical eraseblock while we +- * are moving it, so we lock it. +- */ +- err = leb_write_lock(ubi, vol_id, lnum); +- if (err) +- return err; +- +- mutex_lock(&ubi->buf_mutex); +- +- /* +- * But the logical eraseblock might have been put by this time. +- * Cancel if it is true. +- */ + idx = vol_id2idx(ubi, vol_id); +- ++ spin_lock(&ubi->volumes_lock); + /* +- * We may race with volume deletion/re-size, so we have to hold +- * @ubi->volumes_lock. ++ * Note, we may race with volume deletion, which means that the volume ++ * this logical eraseblock belongs to might be being deleted. Since the ++ * volume deletion un-maps all the volume's logical eraseblocks, it will ++ * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. + */ +- spin_lock(&ubi->volumes_lock); + vol = ubi->volumes[idx]; + if (!vol) { +- dbg_eba("volume %d was removed meanwhile", vol_id); ++ /* No need to do further work, cancel */ ++ dbg_eba("volume %d is being removed, cancel", vol_id); + spin_unlock(&ubi->volumes_lock); +- goto out_unlock; ++ return 1; + } ++ spin_unlock(&ubi->volumes_lock); + +- pnum = vol->eba_tbl[lnum]; +- if (pnum != from) { +- dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " +- "PEB %d, cancel", vol_id, lnum, from, pnum); +- spin_unlock(&ubi->volumes_lock); +- goto out_unlock; ++ /* ++ * We do not want anybody to write to this logical eraseblock while we ++ * are moving it, so lock it. ++ * ++ * Note, we are using non-waiting locking here, because we cannot sleep ++ * on the LEB, since it may cause deadlocks. Indeed, imagine a task is ++ * unmapping the LEB which is mapped to the PEB we are going to move ++ * (@from). This task locks the LEB and goes sleep in the ++ * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are ++ * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the ++ * LEB is already locked, we just do not move it and return %1. ++ */ ++ err = leb_write_trylock(ubi, vol_id, lnum); ++ if (err) { ++ dbg_eba("contention on LEB %d:%d, cancel", vol_id, lnum); ++ return err; + } +- spin_unlock(&ubi->volumes_lock); + +- /* OK, now the LEB is locked and we can safely start moving it */ ++ /* ++ * The LEB might have been put meanwhile, and the task which put it is ++ * probably waiting on @ubi->move_mutex. No need to continue the work, ++ * cancel it. ++ */ ++ if (vol->eba_tbl[lnum] != from) { ++ dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " ++ "PEB %d, cancel", vol_id, lnum, from, ++ vol->eba_tbl[lnum]); ++ err = 1; ++ goto out_unlock_leb; ++ } + ++ /* ++ * OK, now the LEB is locked and we can safely start moving it. Since ++ * this function utilizes the @ubi->peb1_buf buffer which is shared ++ * with some other functions, so lock the buffer by taking the ++ * @ubi->buf_mutex. ++ */ ++ mutex_lock(&ubi->buf_mutex); + dbg_eba("read %d bytes of data", aldata_size); + err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); + if (err && err != UBI_IO_BITFLIPS) { + ubi_warn("error %d while reading data from PEB %d", + err, from); +- goto out_unlock; ++ goto out_unlock_buf; + } + + /* +@@ -1052,8 +1072,11 @@ + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + + err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); +- if (err) +- goto out_unlock; ++ if (err) { ++ if (err == -EIO) ++ err = 2; ++ goto out_unlock_buf; ++ } + + cond_resched(); + +@@ -1062,13 +1085,18 @@ + if (err) { + if (err != UBI_IO_BITFLIPS) + ubi_warn("cannot read VID header back from PEB %d", to); +- goto out_unlock; ++ else ++ err = -EAGAIN; ++ goto out_unlock_buf; + } + + if (data_size > 0) { + err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); +- if (err) +- goto out_unlock; ++ if (err) { ++ if (err == -EIO) ++ err = 2; ++ goto out_unlock_buf; ++ } + + cond_resched(); + +@@ -1082,29 +1110,33 @@ + if (err != UBI_IO_BITFLIPS) + ubi_warn("cannot read data back from PEB %d", + to); +- goto out_unlock; ++ else ++ err = -EAGAIN; ++ goto out_unlock_buf; + } + + cond_resched(); + + if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { +- ubi_warn("read data back from PEB %d - it is different", +- to); +- goto out_unlock; ++ ubi_warn("read data back from PEB %d and it is " ++ "different", to); ++ err = -EINVAL; ++ goto out_unlock_buf; + } + } + + ubi_assert(vol->eba_tbl[lnum] == from); + vol->eba_tbl[lnum] = to; + +-out_unlock: ++out_unlock_buf: + mutex_unlock(&ubi->buf_mutex); ++out_unlock_leb: + leb_write_unlock(ubi, vol_id, lnum); + return err; + } + + /** +- * ubi_eba_init_scan - initialize the EBA unit using scanning information. ++ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information. + * @ubi: UBI device description object + * @si: scanning information + * +@@ -1119,20 +1151,12 @@ + struct ubi_scan_leb *seb; + struct rb_node *rb; + +- dbg_eba("initialize EBA unit"); ++ dbg_eba("initialize EBA sub-system"); + + spin_lock_init(&ubi->ltree_lock); + mutex_init(&ubi->alc_mutex); + ubi->ltree = RB_ROOT; + +- if (ubi_devices_cnt == 0) { +- ltree_slab = kmem_cache_create("ubi_ltree_slab", +- sizeof(struct ltree_entry), 0, +- 0, <ree_entry_ctor); +- if (!ltree_slab) +- return -ENOMEM; +- } +- + ubi->global_sqnum = si->max_sqnum + 1; + num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + +@@ -1168,6 +1192,15 @@ + } + } + ++ if (ubi->avail_pebs < EBA_RESERVED_PEBS) { ++ ubi_err("no enough physical eraseblocks (%d, need %d)", ++ ubi->avail_pebs, EBA_RESERVED_PEBS); ++ err = -ENOSPC; ++ goto out_free; ++ } ++ ubi->avail_pebs -= EBA_RESERVED_PEBS; ++ ubi->rsvd_pebs += EBA_RESERVED_PEBS; ++ + if (ubi->bad_allowed) { + ubi_calculate_reserved(ubi); + +@@ -1184,16 +1217,7 @@ + ubi->rsvd_pebs += ubi->beb_rsvd_pebs; + } + +- if (ubi->avail_pebs < EBA_RESERVED_PEBS) { +- ubi_err("no enough physical eraseblocks (%d, need %d)", +- ubi->avail_pebs, EBA_RESERVED_PEBS); +- err = -ENOSPC; +- goto out_free; +- } +- ubi->avail_pebs -= EBA_RESERVED_PEBS; +- ubi->rsvd_pebs += EBA_RESERVED_PEBS; +- +- dbg_eba("EBA unit is initialized"); ++ dbg_eba("EBA sub-system is initialized"); + return 0; + + out_free: +@@ -1202,26 +1226,5 @@ + continue; + kfree(ubi->volumes[i]->eba_tbl); + } +- if (ubi_devices_cnt == 0) +- kmem_cache_destroy(ltree_slab); + return err; + } +- +-/** +- * ubi_eba_close - close EBA unit. +- * @ubi: UBI device description object +- */ +-void ubi_eba_close(const struct ubi_device *ubi) +-{ +- int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; +- +- dbg_eba("close EBA unit"); +- +- for (i = 0; i < num_volumes; i++) { +- if (!ubi->volumes[i]) +- continue; +- kfree(ubi->volumes[i]->eba_tbl); +- } +- if (ubi_devices_cnt == 1) +- kmem_cache_destroy(ltree_slab); +-} +diff -Nurd linux-2.6.24/drivers/mtd/ubi/gluebi.c ubifs-v2.6.24/drivers/mtd/ubi/gluebi.c +--- linux-2.6.24/drivers/mtd/ubi/gluebi.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/gluebi.c 2009-04-07 17:14:47.000000000 +0200 +@@ -28,7 +28,7 @@ + * eraseblock size is equivalent to the logical eraseblock size of the volume. + */ + +-#include <asm/div64.h> ++#include <linux/math64.h> + #include "ubi.h" + + /** +@@ -109,9 +109,8 @@ + int err = 0, lnum, offs, total_read; + struct ubi_volume *vol; + struct ubi_device *ubi; +- uint64_t tmp = from; + +- dbg_msg("read %zd bytes from offset %lld", len, from); ++ dbg_gen("read %zd bytes from offset %lld", len, from); + + if (len < 0 || from < 0 || from + len > mtd->size) + return -EINVAL; +@@ -119,9 +118,7 @@ + vol = container_of(mtd, struct ubi_volume, gluebi_mtd); + ubi = vol->ubi; + +- offs = do_div(tmp, mtd->erasesize); +- lnum = tmp; +- ++ lnum = div_u64_rem(from, mtd->erasesize, &offs); + total_read = len; + while (total_read) { + size_t to_read = mtd->erasesize - offs; +@@ -129,8 +126,7 @@ + if (to_read > total_read) + to_read = total_read; + +- err = ubi_eba_read_leb(ubi, vol->vol_id, lnum, buf, offs, +- to_read, 0); ++ err = ubi_eba_read_leb(ubi, vol, lnum, buf, offs, to_read, 0); + if (err) + break; + +@@ -161,9 +157,8 @@ + int err = 0, lnum, offs, total_written; + struct ubi_volume *vol; + struct ubi_device *ubi; +- uint64_t tmp = to; + +- dbg_msg("write %zd bytes to offset %lld", len, to); ++ dbg_gen("write %zd bytes to offset %lld", len, to); + + if (len < 0 || to < 0 || len + to > mtd->size) + return -EINVAL; +@@ -174,8 +169,7 @@ + if (ubi->ro_mode) + return -EROFS; + +- offs = do_div(tmp, mtd->erasesize); +- lnum = tmp; ++ lnum = div_u64_rem(to, mtd->erasesize, &offs); + + if (len % mtd->writesize || offs % mtd->writesize) + return -EINVAL; +@@ -187,8 +181,8 @@ + if (to_write > total_written) + to_write = total_written; + +- err = ubi_eba_write_leb(ubi, vol->vol_id, lnum, buf, offs, +- to_write, UBI_UNKNOWN); ++ err = ubi_eba_write_leb(ubi, vol, lnum, buf, offs, to_write, ++ UBI_UNKNOWN); + if (err) + break; + +@@ -216,7 +210,7 @@ + struct ubi_volume *vol; + struct ubi_device *ubi; + +- dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr); ++ dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr); + + if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize) + return -EINVAL; +@@ -237,7 +231,7 @@ + return -EROFS; + + for (i = 0; i < count; i++) { +- err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum + i); ++ err = ubi_eba_unmap_leb(ubi, vol, lnum + i); + if (err) + goto out_err; + } +@@ -250,8 +244,8 @@ + if (err) + goto out_err; + +- instr->state = MTD_ERASE_DONE; +- mtd_erase_callback(instr); ++ instr->state = MTD_ERASE_DONE; ++ mtd_erase_callback(instr); + return 0; + + out_err: +@@ -292,19 +286,20 @@ + /* + * In case of dynamic volume, MTD device size is just volume size. In + * case of a static volume the size is equivalent to the amount of data +- * bytes, which is zero at this moment and will be changed after volume +- * update. ++ * bytes. + */ + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + mtd->size = vol->usable_leb_size * vol->reserved_pebs; ++ else ++ mtd->size = vol->used_bytes; + + if (add_mtd_device(mtd)) { +- ubi_err("cannot not add MTD device\n"); ++ ubi_err("cannot not add MTD device"); + kfree(mtd->name); + return -ENFILE; + } + +- dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u", ++ dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u", + mtd->index, mtd->name, mtd->size, mtd->erasesize); + return 0; + } +@@ -322,7 +317,7 @@ + int err; + struct mtd_info *mtd = &vol->gluebi_mtd; + +- dbg_msg("remove mtd%d", mtd->index); ++ dbg_gen("remove mtd%d", mtd->index); + err = del_mtd_device(mtd); + if (err) + return err; +diff -Nurd linux-2.6.24/drivers/mtd/ubi/io.c ubifs-v2.6.24/drivers/mtd/ubi/io.c +--- linux-2.6.24/drivers/mtd/ubi/io.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/io.c 2009-04-07 17:14:47.000000000 +0200 +@@ -20,15 +20,15 @@ + */ + + /* +- * UBI input/output unit. ++ * UBI input/output sub-system. + * +- * This unit provides a uniform way to work with all kinds of the underlying +- * MTD devices. It also implements handy functions for reading and writing UBI +- * headers. ++ * This sub-system provides a uniform way to work with all kinds of the ++ * underlying MTD devices. It also implements handy functions for reading and ++ * writing UBI headers. + * + * We are trying to have a paranoid mindset and not to trust to what we read +- * from the flash media in order to be more secure and robust. So this unit +- * validates every single header it reads from the flash media. ++ * from the flash media in order to be more secure and robust. So this ++ * sub-system validates every single header it reads from the flash media. + * + * Some words about how the eraseblock headers are stored. + * +@@ -79,11 +79,11 @@ + * 512-byte chunks, we have to allocate one more buffer and copy our VID header + * to offset 448 of this buffer. + * +- * The I/O unit does the following trick in order to avoid this extra copy. +- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header +- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the +- * VID header is being written out, it shifts the VID header pointer back and +- * writes the whole sub-page. ++ * The I/O sub-system does the following trick in order to avoid this extra ++ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID ++ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. ++ * When the VID header is being written out, it shifts the VID header pointer ++ * back and writes the whole sub-page. + */ + + #include <linux/crc32.h> +@@ -156,15 +156,19 @@ + /* + * -EUCLEAN is reported if there was a bit-flip which + * was corrected, so this is harmless. ++ * ++ * We do not report about it here unless debugging is ++ * enabled. A corresponding message will be printed ++ * later, when it is has been scrubbed. + */ +- ubi_msg("fixable bit-flip detected at PEB %d", pnum); ++ dbg_msg("fixable bit-flip detected at PEB %d", pnum); + ubi_assert(len == read); + return UBI_IO_BITFLIPS; + } + + if (read != len && retries++ < UBI_IO_RETRIES) { +- dbg_io("error %d while reading %d bytes from PEB %d:%d, " +- "read only %zd bytes, retry", ++ dbg_io("error %d while reading %d bytes from PEB %d:%d," ++ " read only %zd bytes, retry", + err, len, pnum, offset, read); + yield(); + goto retry; +@@ -173,11 +177,21 @@ + ubi_err("error %d while reading %d bytes from PEB %d:%d, " + "read %zd bytes", err, len, pnum, offset, read); + ubi_dbg_dump_stack(); ++ ++ /* ++ * The driver should never return -EBADMSG if it failed to read ++ * all the requested data. But some buggy drivers might do ++ * this, so we change it to -EIO. ++ */ ++ if (read != len && err == -EBADMSG) { ++ ubi_assert(0); ++ err = -EIO; ++ } + } else { + ubi_assert(len == read); + + if (ubi_dbg_is_bitflip()) { +- dbg_msg("bit-flip (emulated)"); ++ dbg_gen("bit-flip (emulated)"); + err = UBI_IO_BITFLIPS; + } + } +@@ -381,6 +395,7 @@ + { + int err, i, patt_count; + ++ ubi_msg("run torture test for PEB %d", pnum); + patt_count = ARRAY_SIZE(patterns); + ubi_assert(patt_count > 0); + +@@ -424,6 +439,7 @@ + } + + err = patt_count; ++ ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum); + + out: + mutex_unlock(&ubi->buf_mutex); +@@ -667,6 +683,9 @@ + if (verbose) + ubi_warn("no EC header found at PEB %d, " + "only 0xFF bytes", pnum); ++ else if (UBI_IO_DEBUG) ++ dbg_msg("no EC header found at PEB %d, " ++ "only 0xFF bytes", pnum); + return UBI_IO_PEB_EMPTY; + } + +@@ -678,7 +697,9 @@ + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_EC_HDR_MAGIC); + ubi_dbg_dump_ec_hdr(ec_hdr); +- } ++ } else if (UBI_IO_DEBUG) ++ dbg_msg("bad magic number at PEB %d: %08x instead of " ++ "%08x", pnum, magic, UBI_EC_HDR_MAGIC); + return UBI_IO_BAD_EC_HDR; + } + +@@ -687,10 +708,12 @@ + + if (hdr_crc != crc) { + if (verbose) { +- ubi_warn("bad EC header CRC at PEB %d, calculated %#08x," +- " read %#08x", pnum, crc, hdr_crc); ++ ubi_warn("bad EC header CRC at PEB %d, calculated " ++ "%#08x, read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_ec_hdr(ec_hdr); +- } ++ } else if (UBI_IO_DEBUG) ++ dbg_msg("bad EC header CRC at PEB %d, calculated " ++ "%#08x, read %#08x", pnum, crc, hdr_crc); + return UBI_IO_BAD_EC_HDR; + } + +@@ -940,6 +963,9 @@ + if (verbose) + ubi_warn("no VID header found at PEB %d, " + "only 0xFF bytes", pnum); ++ else if (UBI_IO_DEBUG) ++ dbg_msg("no VID header found at PEB %d, " ++ "only 0xFF bytes", pnum); + return UBI_IO_PEB_FREE; + } + +@@ -951,7 +977,9 @@ + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_VID_HDR_MAGIC); + ubi_dbg_dump_vid_hdr(vid_hdr); +- } ++ } else if (UBI_IO_DEBUG) ++ dbg_msg("bad magic number at PEB %d: %08x instead of " ++ "%08x", pnum, magic, UBI_VID_HDR_MAGIC); + return UBI_IO_BAD_VID_HDR; + } + +@@ -963,7 +991,9 @@ + ubi_warn("bad CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_vid_hdr(vid_hdr); +- } ++ } else if (UBI_IO_DEBUG) ++ dbg_msg("bad CRC at PEB %d, calculated %#08x, " ++ "read %#08x", pnum, crc, hdr_crc); + return UBI_IO_BAD_VID_HDR; + } + +@@ -1004,7 +1034,7 @@ + + err = paranoid_check_peb_ec_hdr(ubi, pnum); + if (err) +- return err > 0 ? -EINVAL: err; ++ return err > 0 ? -EINVAL : err; + + vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); + vid_hdr->version = UBI_VERSION; +@@ -1081,8 +1111,7 @@ + } + + /** +- * paranoid_check_peb_ec_hdr - check that the erase counter header of a +- * physical eraseblock is in-place and is all right. ++ * paranoid_check_peb_ec_hdr - check erase counter header. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * +@@ -1160,8 +1189,7 @@ + } + + /** +- * paranoid_check_peb_vid_hdr - check that the volume identifier header of a +- * physical eraseblock is in-place and is all right. ++ * paranoid_check_peb_vid_hdr - check volume identifier header. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * +@@ -1242,7 +1270,7 @@ + + fail: + ubi_err("paranoid check failed for PEB %d", pnum); +- dbg_msg("hex dump of the %d-%d region", offset, offset + len); ++ ubi_msg("hex dump of the %d-%d region", offset, offset + len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, + ubi->dbg_peb_buf, len, 1); + err = 1; +diff -Nurd linux-2.6.24/drivers/mtd/ubi/kapi.c ubifs-v2.6.24/drivers/mtd/ubi/kapi.c +--- linux-2.6.24/drivers/mtd/ubi/kapi.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/kapi.c 2009-04-07 17:14:47.000000000 +0200 +@@ -30,23 +30,27 @@ + * @ubi_num: UBI device number + * @di: the information is stored here + * +- * This function returns %0 in case of success and a %-ENODEV if there is no +- * such UBI device. ++ * This function returns %0 in case of success, %-EINVAL if the UBI device ++ * number is invalid, and %-ENODEV if there is no such UBI device. + */ + int ubi_get_device_info(int ubi_num, struct ubi_device_info *di) + { +- const struct ubi_device *ubi; ++ struct ubi_device *ubi; + +- if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || +- !ubi_devices[ubi_num]) ++ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) ++ return -EINVAL; ++ ++ ubi = ubi_get_device(ubi_num); ++ if (!ubi) + return -ENODEV; + +- ubi = ubi_devices[ubi_num]; + di->ubi_num = ubi->ubi_num; + di->leb_size = ubi->leb_size; + di->min_io_size = ubi->min_io_size; + di->ro_mode = ubi->ro_mode; +- di->cdev = MKDEV(ubi->major, 0); ++ di->cdev = ubi->cdev.dev; ++ ++ ubi_put_device(ubi); + return 0; + } + EXPORT_SYMBOL_GPL(ubi_get_device_info); +@@ -73,7 +77,7 @@ + vi->usable_leb_size = vol->usable_leb_size; + vi->name_len = vol->name_len; + vi->name = vol->name; +- vi->cdev = MKDEV(ubi->major, vi->vol_id + 1); ++ vi->cdev = vol->cdev.dev; + } + EXPORT_SYMBOL_GPL(ubi_get_volume_info); + +@@ -102,39 +106,41 @@ + struct ubi_device *ubi; + struct ubi_volume *vol; + +- dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode); +- +- err = -ENODEV; +- if (ubi_num < 0) +- return ERR_PTR(err); +- +- ubi = ubi_devices[ubi_num]; +- +- if (!try_module_get(THIS_MODULE)) +- return ERR_PTR(err); ++ dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode); + +- if (ubi_num >= UBI_MAX_DEVICES || !ubi) +- goto out_put; ++ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) ++ return ERR_PTR(-EINVAL); + +- err = -EINVAL; +- if (vol_id < 0 || vol_id >= ubi->vtbl_slots) +- goto out_put; + if (mode != UBI_READONLY && mode != UBI_READWRITE && + mode != UBI_EXCLUSIVE) +- goto out_put; ++ return ERR_PTR(-EINVAL); ++ ++ /* ++ * First of all, we have to get the UBI device to prevent its removal. ++ */ ++ ubi = ubi_get_device(ubi_num); ++ if (!ubi) ++ return ERR_PTR(-ENODEV); ++ ++ if (vol_id < 0 || vol_id >= ubi->vtbl_slots) { ++ err = -EINVAL; ++ goto out_put_ubi; ++ } + + desc = kmalloc(sizeof(struct ubi_volume_desc), GFP_KERNEL); + if (!desc) { + err = -ENOMEM; +- goto out_put; ++ goto out_put_ubi; + } + ++ err = -ENODEV; ++ if (!try_module_get(THIS_MODULE)) ++ goto out_free; ++ + spin_lock(&ubi->volumes_lock); + vol = ubi->volumes[vol_id]; +- if (!vol) { +- err = -ENODEV; ++ if (!vol) + goto out_unlock; +- } + + err = -EBUSY; + switch (mode) { +@@ -156,21 +162,19 @@ + vol->exclusive = 1; + break; + } ++ get_device(&vol->dev); ++ vol->ref_count += 1; + spin_unlock(&ubi->volumes_lock); + + desc->vol = vol; + desc->mode = mode; + +- /* +- * To prevent simultaneous checks of the same volume we use @vtbl_mutex, +- * although it is not the purpose it was introduced for. +- */ +- mutex_lock(&ubi->vtbl_mutex); ++ mutex_lock(&ubi->ckvol_mutex); + if (!vol->checked) { + /* This is the first open - check the volume */ + err = ubi_check_volume(ubi, vol_id); + if (err < 0) { +- mutex_unlock(&ubi->vtbl_mutex); ++ mutex_unlock(&ubi->ckvol_mutex); + ubi_close_volume(desc); + return ERR_PTR(err); + } +@@ -181,14 +185,17 @@ + } + vol->checked = 1; + } +- mutex_unlock(&ubi->vtbl_mutex); ++ mutex_unlock(&ubi->ckvol_mutex); ++ + return desc; + + out_unlock: + spin_unlock(&ubi->volumes_lock); +- kfree(desc); +-out_put: + module_put(THIS_MODULE); ++out_free: ++ kfree(desc); ++out_put_ubi: ++ ubi_put_device(ubi); + return ERR_PTR(err); + } + EXPORT_SYMBOL_GPL(ubi_open_volume); +@@ -205,10 +212,10 @@ + int mode) + { + int i, vol_id = -1, len; +- struct ubi_volume_desc *ret; + struct ubi_device *ubi; ++ struct ubi_volume_desc *ret; + +- dbg_msg("open volume %s, mode %d", name, mode); ++ dbg_gen("open volume %s, mode %d", name, mode); + + if (!name) + return ERR_PTR(-EINVAL); +@@ -217,14 +224,12 @@ + if (len > UBI_VOL_NAME_MAX) + return ERR_PTR(-EINVAL); + +- ret = ERR_PTR(-ENODEV); +- if (!try_module_get(THIS_MODULE)) +- return ret; +- +- if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi_devices[ubi_num]) +- goto out_put; ++ if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) ++ return ERR_PTR(-EINVAL); + +- ubi = ubi_devices[ubi_num]; ++ ubi = ubi_get_device(ubi_num); ++ if (!ubi) ++ return ERR_PTR(-ENODEV); + + spin_lock(&ubi->volumes_lock); + /* Walk all volumes of this UBI device */ +@@ -238,13 +243,16 @@ + } + spin_unlock(&ubi->volumes_lock); + +- if (vol_id < 0) +- goto out_put; +- +- ret = ubi_open_volume(ubi_num, vol_id, mode); ++ if (vol_id >= 0) ++ ret = ubi_open_volume(ubi_num, vol_id, mode); ++ else ++ ret = ERR_PTR(-ENODEV); + +-out_put: +- module_put(THIS_MODULE); ++ /* ++ * We should put the UBI device even in case of success, because ++ * 'ubi_open_volume()' took a reference as well. ++ */ ++ ubi_put_device(ubi); + return ret; + } + EXPORT_SYMBOL_GPL(ubi_open_volume_nm); +@@ -256,10 +264,11 @@ + void ubi_close_volume(struct ubi_volume_desc *desc) + { + struct ubi_volume *vol = desc->vol; ++ struct ubi_device *ubi = vol->ubi; + +- dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode); ++ dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode); + +- spin_lock(&vol->ubi->volumes_lock); ++ spin_lock(&ubi->volumes_lock); + switch (desc->mode) { + case UBI_READONLY: + vol->readers -= 1; +@@ -270,9 +279,12 @@ + case UBI_EXCLUSIVE: + vol->exclusive = 0; + } +- spin_unlock(&vol->ubi->volumes_lock); ++ vol->ref_count -= 1; ++ spin_unlock(&ubi->volumes_lock); + + kfree(desc); ++ put_device(&vol->dev); ++ ubi_put_device(ubi); + module_put(THIS_MODULE); + } + EXPORT_SYMBOL_GPL(ubi_close_volume); +@@ -311,7 +323,7 @@ + struct ubi_device *ubi = vol->ubi; + int err, vol_id = vol->vol_id; + +- dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); ++ dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || + lnum >= vol->used_ebs || offset < 0 || len < 0 || +@@ -332,7 +344,7 @@ + if (len == 0) + return 0; + +- err = ubi_eba_read_leb(ubi, vol_id, lnum, buf, offset, len, check); ++ err = ubi_eba_read_leb(ubi, vol, lnum, buf, offset, len, check); + if (err && err == -EBADMSG && vol->vol_type == UBI_STATIC_VOLUME) { + ubi_warn("mark volume %d as corrupted", vol_id); + vol->corrupted = 1; +@@ -376,7 +388,7 @@ + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + +- dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); ++ dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; +@@ -385,8 +397,8 @@ + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 || +- offset + len > vol->usable_leb_size || offset % ubi->min_io_size || +- len % ubi->min_io_size) ++ offset + len > vol->usable_leb_size || ++ offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1)) + return -EINVAL; + + if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && +@@ -399,7 +411,7 @@ + if (len == 0) + return 0; + +- return ubi_eba_write_leb(ubi, vol_id, lnum, buf, offset, len, dtype); ++ return ubi_eba_write_leb(ubi, vol, lnum, buf, offset, len, dtype); + } + EXPORT_SYMBOL_GPL(ubi_leb_write); + +@@ -426,7 +438,7 @@ + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + +- dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); ++ dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; +@@ -435,7 +447,7 @@ + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 || +- len > vol->usable_leb_size || len % ubi->min_io_size) ++ len > vol->usable_leb_size || len & (ubi->min_io_size - 1)) + return -EINVAL; + + if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && +@@ -448,7 +460,7 @@ + if (len == 0) + return 0; + +- return ubi_eba_atomic_leb_change(ubi, vol_id, lnum, buf, len, dtype); ++ return ubi_eba_atomic_leb_change(ubi, vol, lnum, buf, len, dtype); + } + EXPORT_SYMBOL_GPL(ubi_leb_change); + +@@ -468,9 +480,9 @@ + { + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; +- int err, vol_id = vol->vol_id; ++ int err; + +- dbg_msg("erase LEB %d:%d", vol_id, lnum); ++ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; +@@ -481,7 +493,7 @@ + if (vol->upd_marker) + return -EBADF; + +- err = ubi_eba_unmap_leb(ubi, vol_id, lnum); ++ err = ubi_eba_unmap_leb(ubi, vol, lnum); + if (err) + return err; + +@@ -529,9 +541,8 @@ + { + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; +- int vol_id = vol->vol_id; + +- dbg_msg("unmap LEB %d:%d", vol_id, lnum); ++ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; +@@ -542,11 +553,55 @@ + if (vol->upd_marker) + return -EBADF; + +- return ubi_eba_unmap_leb(ubi, vol_id, lnum); ++ return ubi_eba_unmap_leb(ubi, vol, lnum); + } + EXPORT_SYMBOL_GPL(ubi_leb_unmap); + + /** ++ * ubi_leb_map - map logical erasblock to a physical eraseblock. ++ * @desc: volume descriptor ++ * @lnum: logical eraseblock number ++ * @dtype: expected data type ++ * ++ * This function maps an un-mapped logical eraseblock @lnum to a physical ++ * eraseblock. This means, that after a successfull invocation of this ++ * function the logical eraseblock @lnum will be empty (contain only %0xFF ++ * bytes) and be mapped to a physical eraseblock, even if an unclean reboot ++ * happens. ++ * ++ * This function returns zero in case of success, %-EBADF if the volume is ++ * damaged because of an interrupted update, %-EBADMSG if the logical ++ * eraseblock is already mapped, and other negative error codes in case of ++ * other failures. ++ */ ++int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) ++{ ++ struct ubi_volume *vol = desc->vol; ++ struct ubi_device *ubi = vol->ubi; ++ ++ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum); ++ ++ if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) ++ return -EROFS; ++ ++ if (lnum < 0 || lnum >= vol->reserved_pebs) ++ return -EINVAL; ++ ++ if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && ++ dtype != UBI_UNKNOWN) ++ return -EINVAL; ++ ++ if (vol->upd_marker) ++ return -EBADF; ++ ++ if (vol->eba_tbl[lnum] >= 0) ++ return -EBADMSG; ++ ++ return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0, dtype); ++} ++EXPORT_SYMBOL_GPL(ubi_leb_map); ++ ++/** + * ubi_is_mapped - check if logical eraseblock is mapped. + * @desc: volume descriptor + * @lnum: logical eraseblock number +@@ -566,7 +621,7 @@ + { + struct ubi_volume *vol = desc->vol; + +- dbg_msg("test LEB %d:%d", vol->vol_id, lnum); ++ dbg_gen("test LEB %d:%d", vol->vol_id, lnum); + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; +@@ -577,3 +632,27 @@ + return vol->eba_tbl[lnum] >= 0; + } + EXPORT_SYMBOL_GPL(ubi_is_mapped); ++ ++/** ++ * ubi_sync - synchronize UBI device buffers. ++ * @ubi_num: UBI device to synchronize ++ * ++ * The underlying MTD device may cache data in hardware or in software. This ++ * function ensures the caches are flushed. Returns zero in case of success and ++ * a negative error code in case of failure. ++ */ ++int ubi_sync(int ubi_num) ++{ ++ struct ubi_device *ubi; ++ ++ ubi = ubi_get_device(ubi_num); ++ if (!ubi) ++ return -ENODEV; ++ ++ if (ubi->mtd->sync) ++ ubi->mtd->sync(ubi->mtd); ++ ++ ubi_put_device(ubi); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(ubi_sync); +diff -Nurd linux-2.6.24/drivers/mtd/ubi/misc.c ubifs-v2.6.24/drivers/mtd/ubi/misc.c +--- linux-2.6.24/drivers/mtd/ubi/misc.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/misc.c 2009-04-07 17:14:47.000000000 +0200 +@@ -37,7 +37,7 @@ + { + int i; + +- ubi_assert(length % ubi->min_io_size == 0); ++ ubi_assert(!(length & (ubi->min_io_size - 1))); + + for (i = length - 1; i >= 0; i--) + if (((const uint8_t *)buf)[i] != 0xFF) +@@ -79,7 +79,7 @@ + else + size = vol->usable_leb_size; + +- err = ubi_eba_read_leb(ubi, vol_id, i, buf, 0, size, 1); ++ err = ubi_eba_read_leb(ubi, vol, i, buf, 0, size, 1); + if (err) { + if (err == -EBADMSG) + err = 1; +diff -Nurd linux-2.6.24/drivers/mtd/ubi/scan.c ubifs-v2.6.24/drivers/mtd/ubi/scan.c +--- linux-2.6.24/drivers/mtd/ubi/scan.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/scan.c 2009-04-07 17:14:47.000000000 +0200 +@@ -19,9 +19,9 @@ + */ + + /* +- * UBI scanning unit. ++ * UBI scanning sub-system. + * +- * This unit is responsible for scanning the flash media, checking UBI ++ * This sub-system is responsible for scanning the flash media, checking UBI + * headers and providing complete information about the UBI flash image. + * + * The scanning information is represented by a &struct ubi_scan_info' object. +@@ -92,29 +92,7 @@ + } + + /** +- * commit_to_mean_value - commit intermediate results to the final mean erase +- * counter value. +- * @si: scanning information +- * +- * This is a helper function which calculates partial mean erase counter mean +- * value and adds it to the resulting mean value. As we can work only in +- * integer arithmetic and we want to calculate the mean value of erase counter +- * accurately, we first sum erase counter values in @si->ec_sum variable and +- * count these components in @si->ec_count. If this temporary @si->ec_sum is +- * going to overflow, we calculate the partial mean value +- * (@si->ec_sum/@si->ec_count) and add it to @si->mean_ec. +- */ +-static void commit_to_mean_value(struct ubi_scan_info *si) +-{ +- si->ec_sum /= si->ec_count; +- if (si->ec_sum % si->ec_count >= si->ec_count / 2) +- si->mean_ec += 1; +- si->mean_ec += si->ec_sum; +-} +- +-/** +- * validate_vid_hdr - check that volume identifier header is correct and +- * consistent. ++ * validate_vid_hdr - check volume identifier header. + * @vid_hdr: the volume identifier header to check + * @sv: information about the volume this logical eraseblock belongs to + * @pnum: physical eraseblock number the VID header came from +@@ -123,7 +101,7 @@ + * non-zero if an inconsistency was found and zero if not. + * + * Note, UBI does sanity check of everything it reads from the flash media. +- * Most of the checks are done in the I/O unit. Here we check that the ++ * Most of the checks are done in the I/O sub-system. Here we check that the + * information in the VID header is consistent to the information in other VID + * headers of the same volume. + */ +@@ -267,40 +245,21 @@ + struct ubi_vid_hdr *vh = NULL; + unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); + +- if (seb->sqnum == 0 && sqnum2 == 0) { +- long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver); +- ++ if (sqnum2 == seb->sqnum) { + /* +- * UBI constantly increases the logical eraseblock version +- * number and it can overflow. Thus, we have to bear in mind +- * that versions that are close to %0xFFFFFFFF are less then +- * versions that are close to %0. +- * +- * The UBI WL unit guarantees that the number of pending tasks +- * is not greater then %0x7FFFFFFF. So, if the difference +- * between any two versions is greater or equivalent to +- * %0x7FFFFFFF, there was an overflow and the logical +- * eraseblock with lower version is actually newer then the one +- * with higher version. +- * +- * FIXME: but this is anyway obsolete and will be removed at +- * some point. ++ * This must be a really ancient UBI image which has been ++ * created before sequence numbers support has been added. At ++ * that times we used 32-bit LEB versions stored in logical ++ * eraseblocks. That was before UBI got into mainline. We do not ++ * support these images anymore. Well, those images will work ++ * still work, but only if no unclean reboots happened. + */ ++ ubi_err("unsupported on-flash UBI format\n"); ++ return -EINVAL; ++ } + +- dbg_bld("using old crappy leb_ver stuff"); +- +- abs = v1 - v2; +- if (abs < 0) +- abs = -abs; +- +- if (abs < 0x7FFFFFFF) +- /* Non-overflow situation */ +- second_is_newer = (v2 > v1); +- else +- second_is_newer = (v2 < v1); +- } else +- /* Obviously the LEB with lower sequence counter is older */ +- second_is_newer = sqnum2 > seb->sqnum; ++ /* Obviously the LEB with lower sequence counter is older */ ++ second_is_newer = !!(sqnum2 > seb->sqnum); + + /* + * Now we know which copy is newer. If the copy flag of the PEB with +@@ -308,7 +267,7 @@ + * check data CRC. For the second PEB we already have the VID header, + * for the first one - we'll need to re-read it from flash. + * +- * FIXME: this may be optimized so that we wouldn't read twice. ++ * Note: this may be optimized so that we wouldn't read twice. + */ + + if (second_is_newer) { +@@ -360,7 +319,7 @@ + } + + err = ubi_io_read_data(ubi, buf, pnum, 0, len); +- if (err && err != UBI_IO_BITFLIPS) ++ if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto out_free_buf; + + data_crc = be32_to_cpu(vid_hdr->data_crc); +@@ -390,13 +349,11 @@ + vfree(buf); + out_free_vidh: + ubi_free_vid_hdr(ubi, vh); +- ubi_assert(err < 0); + return err; + } + + /** +- * ubi_scan_add_used - add information about a physical eraseblock to the +- * scanning information. ++ * ubi_scan_add_used - add physical eraseblock to the scanning information. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: the physical eraseblock number +@@ -416,7 +373,6 @@ + int bitflips) + { + int err, vol_id, lnum; +- uint32_t leb_ver; + unsigned long long sqnum; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb; +@@ -425,13 +381,12 @@ + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + sqnum = be64_to_cpu(vid_hdr->sqnum); +- leb_ver = be32_to_cpu(vid_hdr->leb_ver); + +- dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", +- pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); ++ dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d", ++ pnum, vol_id, lnum, ec, sqnum, bitflips); + + sv = add_volume(si, vol_id, pnum, vid_hdr); +- if (IS_ERR(sv) < 0) ++ if (IS_ERR(sv)) + return PTR_ERR(sv); + + if (si->max_sqnum < sqnum) +@@ -461,25 +416,20 @@ + */ + + dbg_bld("this LEB already exists: PEB %d, sqnum %llu, " +- "LEB ver %u, EC %d", seb->pnum, seb->sqnum, +- seb->leb_ver, seb->ec); +- +- /* +- * Make sure that the logical eraseblocks have different +- * versions. Otherwise the image is bad. +- */ +- if (seb->leb_ver == leb_ver && leb_ver != 0) { +- ubi_err("two LEBs with same version %u", leb_ver); +- ubi_dbg_dump_seb(seb, 0); +- ubi_dbg_dump_vid_hdr(vid_hdr); +- return -EINVAL; +- } ++ "EC %d", seb->pnum, seb->sqnum, seb->ec); + + /* + * Make sure that the logical eraseblocks have different + * sequence numbers. Otherwise the image is bad. + * +- * FIXME: remove 'sqnum != 0' check when leb_ver is removed. ++ * However, if the sequence number is zero, we assume it must ++ * be an ancient UBI image from the era when UBI did not have ++ * sequence numbers. We still can attach these images, unless ++ * there is a need to distinguish between old and new ++ * eraseblocks, in which case we'll refuse the image in ++ * 'compare_lebs()'. In other words, we attach old clean ++ * images, but refuse attaching old images with duplicated ++ * logical eraseblocks because there was an unclean reboot. + */ + if (seb->sqnum == sqnum && sqnum != 0) { + ubi_err("two LEBs with same sequence number %llu", +@@ -519,7 +469,6 @@ + seb->pnum = pnum; + seb->scrub = ((cmp_res & 2) || bitflips); + seb->sqnum = sqnum; +- seb->leb_ver = leb_ver; + + if (sv->highest_lnum == lnum) + sv->last_data_size = +@@ -556,7 +505,6 @@ + seb->lnum = lnum; + seb->sqnum = sqnum; + seb->scrub = bitflips; +- seb->leb_ver = leb_ver; + + if (sv->highest_lnum <= lnum) { + sv->highest_lnum = lnum; +@@ -570,8 +518,7 @@ + } + + /** +- * ubi_scan_find_sv - find information about a particular volume in the +- * scanning information. ++ * ubi_scan_find_sv - find volume in the scanning information. + * @si: scanning information + * @vol_id: the requested volume ID + * +@@ -600,8 +547,7 @@ + } + + /** +- * ubi_scan_find_seb - find information about a particular logical +- * eraseblock in the volume scanning information. ++ * ubi_scan_find_seb - find LEB in the volume scanning information. + * @sv: a pointer to the volume scanning information + * @lnum: the requested logical eraseblock + * +@@ -661,9 +607,9 @@ + * + * This function erases physical eraseblock 'pnum', and writes the erase + * counter header to it. This function should only be used on UBI device +- * initialization stages, when the EBA unit had not been yet initialized. This +- * function returns zero in case of success and a negative error code in case +- * of failure. ++ * initialization stages, when the EBA sub-system had not been yet initialized. ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. + */ + int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si, + int pnum, int ec) +@@ -703,9 +649,10 @@ + * @si: scanning information + * + * This function returns a free physical eraseblock. It is supposed to be +- * called on the UBI initialization stages when the wear-leveling unit is not +- * initialized yet. This function picks a physical eraseblocks from one of the +- * lists, writes the EC header if it is needed, and removes it from the list. ++ * called on the UBI initialization stages when the wear-leveling sub-system is ++ * not initialized yet. This function picks a physical eraseblocks from one of ++ * the lists, writes the EC header if it is needed, and removes it from the ++ * list. + * + * This function returns scanning physical eraseblock information in case of + * success and an error code in case of failure. +@@ -758,8 +705,7 @@ + } + + /** +- * process_eb - read UBI headers, check them and add corresponding data +- * to the scanning information. ++ * process_eb - read, check UBI headers, and add them to scanning information. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: the physical eraseblock number +@@ -767,9 +713,10 @@ + * This function returns a zero if the physical eraseblock was successfully + * handled and a negative error code in case of failure. + */ +-static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) ++static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, ++ int pnum) + { +- long long ec; ++ long long uninitialized_var(ec); + int err, bitflips = 0, vol_id, ec_corr = 0; + + dbg_bld("scan PEB %d", pnum); +@@ -780,8 +727,9 @@ + return err; + else if (err) { + /* +- * FIXME: this is actually duty of the I/O unit to initialize +- * this, but MTD does not provide enough information. ++ * FIXME: this is actually duty of the I/O sub-system to ++ * initialize this, but MTD does not provide enough ++ * information. + */ + si->bad_peb_count += 1; + return 0; +@@ -854,7 +802,7 @@ + } + + vol_id = be32_to_cpu(vidh->vol_id); +- if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) { ++ if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) { + int lnum = be32_to_cpu(vidh->lnum); + + /* Unsupported internal volume */ +@@ -897,15 +845,8 @@ + + adjust_mean_ec: + if (!ec_corr) { +- if (si->ec_sum + ec < ec) { +- commit_to_mean_value(si); +- si->ec_sum = 0; +- si->ec_count = 0; +- } else { +- si->ec_sum += ec; +- si->ec_count += 1; +- } +- ++ si->ec_sum += ec; ++ si->ec_count += 1; + if (ec > si->max_ec) + si->max_ec = ec; + if (ec < si->min_ec) +@@ -953,7 +894,7 @@ + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + cond_resched(); + +- dbg_msg("process PEB %d", pnum); ++ dbg_gen("process PEB %d", pnum); + err = process_eb(ubi, si, pnum); + if (err < 0) + goto out_vidh; +@@ -961,9 +902,9 @@ + + dbg_msg("scanning is finished"); + +- /* Finish mean erase counter calculations */ ++ /* Calculate mean erase counter */ + if (si->ec_count) +- commit_to_mean_value(si); ++ si->mean_ec = div_u64(si->ec_sum, si->ec_count); + + if (si->is_empty) + ubi_msg("empty MTD device detected"); +@@ -1100,8 +1041,7 @@ + #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + + /** +- * paranoid_check_si - check if the scanning information is correct and +- * consistent. ++ * paranoid_check_si - check the scanning information. + * @ubi: UBI device description object + * @si: scanning information + * +@@ -1286,11 +1226,6 @@ + ubi_err("bad data_pad %d", sv->data_pad); + goto bad_vid_hdr; + } +- +- if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) { +- ubi_err("bad leb_ver %u", seb->leb_ver); +- goto bad_vid_hdr; +- } + } + + if (!last_seb) +@@ -1320,8 +1255,7 @@ + if (err < 0) { + kfree(buf); + return err; +- } +- else if (err) ++ } else if (err) + buf[pnum] = 1; + } + +diff -Nurd linux-2.6.24/drivers/mtd/ubi/scan.h ubifs-v2.6.24/drivers/mtd/ubi/scan.h +--- linux-2.6.24/drivers/mtd/ubi/scan.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/scan.h 2009-04-07 17:14:47.000000000 +0200 +@@ -34,7 +34,6 @@ + * @u: unions RB-tree or @list links + * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects + * @u.list: link in one of the eraseblock lists +- * @leb_ver: logical eraseblock version (obsolete) + * + * One object of this type is allocated for each physical eraseblock during + * scanning. +@@ -49,7 +48,6 @@ + struct rb_node rb; + struct list_head list; + } u; +- uint32_t leb_ver; + }; + + /** +@@ -59,16 +57,16 @@ + * @leb_count: number of logical eraseblocks in this volume + * @vol_type: volume type + * @used_ebs: number of used logical eraseblocks in this volume (only for +- * static volumes) ++ * static volumes) + * @last_data_size: amount of data in the last logical eraseblock of this +- * volume (always equivalent to the usable logical eraseblock size in case of +- * dynamic volumes) ++ * volume (always equivalent to the usable logical eraseblock ++ * size in case of dynamic volumes) + * @data_pad: how many bytes at the end of logical eraseblocks of this volume +- * are not used (due to volume alignment) ++ * are not used (due to volume alignment) + * @compat: compatibility flags of this volume + * @rb: link in the volume RB-tree + * @root: root of the RB-tree containing all the eraseblock belonging to this +- * volume (&struct ubi_scan_leb objects) ++ * volume (&struct ubi_scan_leb objects) + * + * One object of this type is allocated for each volume during scanning. + */ +@@ -92,8 +90,8 @@ + * @free: list of free physical eraseblocks + * @erase: list of physical eraseblocks which have to be erased + * @alien: list of physical eraseblocks which should not be used by UBI (e.g., ++ * those belonging to "preserve"-compatible internal volumes) + * @bad_peb_count: count of bad physical eraseblocks +- * those belonging to "preserve"-compatible internal volumes) + * @vols_found: number of volumes found during scanning + * @highest_vol_id: highest volume ID + * @alien_peb_count: count of physical eraseblocks in the @alien list +@@ -106,8 +104,8 @@ + * @ec_count: a temporary variable used when calculating @mean_ec + * + * This data structure contains the result of scanning and may be used by other +- * UBI units to build final UBI data structures, further error-recovery and so +- * on. ++ * UBI sub-systems to build final UBI data structures, further error-recovery ++ * and so on. + */ + struct ubi_scan_info { + struct rb_root volumes; +@@ -124,7 +122,7 @@ + int max_ec; + unsigned long long max_sqnum; + int mean_ec; +- int ec_sum; ++ uint64_t ec_sum; + int ec_count; + }; + +@@ -132,8 +130,7 @@ + struct ubi_vid_hdr; + + /* +- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a +- * list. ++ * ubi_scan_move_to_list - move a PEB from the volume tree to a list. + * + * @sv: volume scanning information + * @seb: scanning eraseblock infprmation +diff -Nurd linux-2.6.24/drivers/mtd/ubi/ubi-media.h ubifs-v2.6.24/drivers/mtd/ubi/ubi-media.h +--- linux-2.6.24/drivers/mtd/ubi/ubi-media.h 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/ubi-media.h 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,368 @@ ++/* ++ * Copyright (c) International Business Machines Corp., 2006 ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See ++ * the GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Thomas Gleixner ++ * Frank Haverkamp ++ * Oliver Lohmann ++ * Andreas Arnez ++ */ ++ ++/* ++ * This file defines the layout of UBI headers and all the other UBI on-flash ++ * data structures. ++ */ ++ ++#ifndef __UBI_MEDIA_H__ ++#define __UBI_MEDIA_H__ ++ ++#include <asm/byteorder.h> ++ ++/* The version of UBI images supported by this implementation */ ++#define UBI_VERSION 1 ++ ++/* The highest erase counter value supported by this implementation */ ++#define UBI_MAX_ERASECOUNTER 0x7FFFFFFF ++ ++/* The initial CRC32 value used when calculating CRC checksums */ ++#define UBI_CRC32_INIT 0xFFFFFFFFU ++ ++/* Erase counter header magic number (ASCII "UBI#") */ ++#define UBI_EC_HDR_MAGIC 0x55424923 ++/* Volume identifier header magic number (ASCII "UBI!") */ ++#define UBI_VID_HDR_MAGIC 0x55424921 ++ ++/* ++ * Volume type constants used in the volume identifier header. ++ * ++ * @UBI_VID_DYNAMIC: dynamic volume ++ * @UBI_VID_STATIC: static volume ++ */ ++enum { ++ UBI_VID_DYNAMIC = 1, ++ UBI_VID_STATIC = 2 ++}; ++ ++/* ++ * Volume flags used in the volume table record. ++ * ++ * @UBI_VTBL_AUTORESIZE_FLG: auto-resize this volume ++ * ++ * %UBI_VTBL_AUTORESIZE_FLG flag can be set only for one volume in the volume ++ * table. UBI automatically re-sizes the volume which has this flag and makes ++ * the volume to be of largest possible size. This means that if after the ++ * initialization UBI finds out that there are available physical eraseblocks ++ * present on the device, it automatically appends all of them to the volume ++ * (the physical eraseblocks reserved for bad eraseblocks handling and other ++ * reserved physical eraseblocks are not taken). So, if there is a volume with ++ * the %UBI_VTBL_AUTORESIZE_FLG flag set, the amount of available logical ++ * eraseblocks will be zero after UBI is loaded, because all of them will be ++ * reserved for this volume. Note, the %UBI_VTBL_AUTORESIZE_FLG bit is cleared ++ * after the volume had been initialized. ++ * ++ * The auto-resize feature is useful for device production purposes. For ++ * example, different NAND flash chips may have different amount of initial bad ++ * eraseblocks, depending of particular chip instance. Manufacturers of NAND ++ * chips usually guarantee that the amount of initial bad eraseblocks does not ++ * exceed certain percent, e.g. 2%. When one creates an UBI image which will be ++ * flashed to the end devices in production, he does not know the exact amount ++ * of good physical eraseblocks the NAND chip on the device will have, but this ++ * number is required to calculate the volume sized and put them to the volume ++ * table of the UBI image. In this case, one of the volumes (e.g., the one ++ * which will store the root file system) is marked as "auto-resizable", and ++ * UBI will adjust its size on the first boot if needed. ++ * ++ * Note, first UBI reserves some amount of physical eraseblocks for bad ++ * eraseblock handling, and then re-sizes the volume, not vice-versa. This ++ * means that the pool of reserved physical eraseblocks will always be present. ++ */ ++enum { ++ UBI_VTBL_AUTORESIZE_FLG = 0x01, ++}; ++ ++/* ++ * Compatibility constants used by internal volumes. ++ * ++ * @UBI_COMPAT_DELETE: delete this internal volume before anything is written ++ * to the flash ++ * @UBI_COMPAT_RO: attach this device in read-only mode ++ * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its ++ * physical eraseblocks, don't allow the wear-leveling ++ * sub-system to move them ++ * @UBI_COMPAT_REJECT: reject this UBI image ++ */ ++enum { ++ UBI_COMPAT_DELETE = 1, ++ UBI_COMPAT_RO = 2, ++ UBI_COMPAT_PRESERVE = 4, ++ UBI_COMPAT_REJECT = 5 ++}; ++ ++/* Sizes of UBI headers */ ++#define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr) ++#define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr) ++ ++/* Sizes of UBI headers without the ending CRC */ ++#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(__be32)) ++#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(__be32)) ++ ++/** ++ * struct ubi_ec_hdr - UBI erase counter header. ++ * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) ++ * @version: version of UBI implementation which is supposed to accept this ++ * UBI image ++ * @padding1: reserved for future, zeroes ++ * @ec: the erase counter ++ * @vid_hdr_offset: where the VID header starts ++ * @data_offset: where the user data start ++ * @padding2: reserved for future, zeroes ++ * @hdr_crc: erase counter header CRC checksum ++ * ++ * The erase counter header takes 64 bytes and has a plenty of unused space for ++ * future usage. The unused fields are zeroed. The @version field is used to ++ * indicate the version of UBI implementation which is supposed to be able to ++ * work with this UBI image. If @version is greater then the current UBI ++ * version, the image is rejected. This may be useful in future if something ++ * is changed radically. This field is duplicated in the volume identifier ++ * header. ++ * ++ * The @vid_hdr_offset and @data_offset fields contain the offset of the the ++ * volume identifier header and user data, relative to the beginning of the ++ * physical eraseblock. These values have to be the same for all physical ++ * eraseblocks. ++ */ ++struct ubi_ec_hdr { ++ __be32 magic; ++ __u8 version; ++ __u8 padding1[3]; ++ __be64 ec; /* Warning: the current limit is 31-bit anyway! */ ++ __be32 vid_hdr_offset; ++ __be32 data_offset; ++ __u8 padding2[36]; ++ __be32 hdr_crc; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubi_vid_hdr - on-flash UBI volume identifier header. ++ * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) ++ * @version: UBI implementation version which is supposed to accept this UBI ++ * image (%UBI_VERSION) ++ * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) ++ * @copy_flag: if this logical eraseblock was copied from another physical ++ * eraseblock (for wear-leveling reasons) ++ * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, ++ * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) ++ * @vol_id: ID of this volume ++ * @lnum: logical eraseblock number ++ * @padding1: reserved for future, zeroes ++ * @data_size: how many bytes of data this logical eraseblock contains ++ * @used_ebs: total number of used logical eraseblocks in this volume ++ * @data_pad: how many bytes at the end of this physical eraseblock are not ++ * used ++ * @data_crc: CRC checksum of the data stored in this logical eraseblock ++ * @padding2: reserved for future, zeroes ++ * @sqnum: sequence number ++ * @padding3: reserved for future, zeroes ++ * @hdr_crc: volume identifier header CRC checksum ++ * ++ * The @sqnum is the value of the global sequence counter at the time when this ++ * VID header was created. The global sequence counter is incremented each time ++ * UBI writes a new VID header to the flash, i.e. when it maps a logical ++ * eraseblock to a new physical eraseblock. The global sequence counter is an ++ * unsigned 64-bit integer and we assume it never overflows. The @sqnum ++ * (sequence number) is used to distinguish between older and newer versions of ++ * logical eraseblocks. ++ * ++ * There are 2 situations when there may be more then one physical eraseblock ++ * corresponding to the same logical eraseblock, i.e., having the same @vol_id ++ * and @lnum values in the volume identifier header. Suppose we have a logical ++ * eraseblock L and it is mapped to the physical eraseblock P. ++ * ++ * 1. Because UBI may erase physical eraseblocks asynchronously, the following ++ * situation is possible: L is asynchronously erased, so P is scheduled for ++ * erasure, then L is written to,i.e. mapped to another physical eraseblock P1, ++ * so P1 is written to, then an unclean reboot happens. Result - there are 2 ++ * physical eraseblocks P and P1 corresponding to the same logical eraseblock ++ * L. But P1 has greater sequence number, so UBI picks P1 when it attaches the ++ * flash. ++ * ++ * 2. From time to time UBI moves logical eraseblocks to other physical ++ * eraseblocks for wear-leveling reasons. If, for example, UBI moves L from P ++ * to P1, and an unclean reboot happens before P is physically erased, there ++ * are two physical eraseblocks P and P1 corresponding to L and UBI has to ++ * select one of them when the flash is attached. The @sqnum field says which ++ * PEB is the original (obviously P will have lower @sqnum) and the copy. But ++ * it is not enough to select the physical eraseblock with the higher sequence ++ * number, because the unclean reboot could have happen in the middle of the ++ * copying process, so the data in P is corrupted. It is also not enough to ++ * just select the physical eraseblock with lower sequence number, because the ++ * data there may be old (consider a case if more data was added to P1 after ++ * the copying). Moreover, the unclean reboot may happen when the erasure of P ++ * was just started, so it result in unstable P, which is "mostly" OK, but ++ * still has unstable bits. ++ * ++ * UBI uses the @copy_flag field to indicate that this logical eraseblock is a ++ * copy. UBI also calculates data CRC when the data is moved and stores it at ++ * the @data_crc field of the copy (P1). So when UBI needs to pick one physical ++ * eraseblock of two (P or P1), the @copy_flag of the newer one (P1) is ++ * examined. If it is cleared, the situation* is simple and the newer one is ++ * picked. If it is set, the data CRC of the copy (P1) is examined. If the CRC ++ * checksum is correct, this physical eraseblock is selected (P1). Otherwise ++ * the older one (P) is selected. ++ * ++ * There are 2 sorts of volumes in UBI: user volumes and internal volumes. ++ * Internal volumes are not seen from outside and are used for various internal ++ * UBI purposes. In this implementation there is only one internal volume - the ++ * layout volume. Internal volumes are the main mechanism of UBI extensions. ++ * For example, in future one may introduce a journal internal volume. Internal ++ * volumes have their own reserved range of IDs. ++ * ++ * The @compat field is only used for internal volumes and contains the "degree ++ * of their compatibility". It is always zero for user volumes. This field ++ * provides a mechanism to introduce UBI extensions and to be still compatible ++ * with older UBI binaries. For example, if someone introduced a journal in ++ * future, he would probably use %UBI_COMPAT_DELETE compatibility for the ++ * journal volume. And in this case, older UBI binaries, which know nothing ++ * about the journal volume, would just delete this volume and work perfectly ++ * fine. This is similar to what Ext2fs does when it is fed by an Ext3fs image ++ * - it just ignores the Ext3fs journal. ++ * ++ * The @data_crc field contains the CRC checksum of the contents of the logical ++ * eraseblock if this is a static volume. In case of dynamic volumes, it does ++ * not contain the CRC checksum as a rule. The only exception is when the ++ * data of the physical eraseblock was moved by the wear-leveling sub-system, ++ * then the wear-leveling sub-system calculates the data CRC and stores it in ++ * the @data_crc field. And of course, the @copy_flag is %in this case. ++ * ++ * The @data_size field is used only for static volumes because UBI has to know ++ * how many bytes of data are stored in this eraseblock. For dynamic volumes, ++ * this field usually contains zero. The only exception is when the data of the ++ * physical eraseblock was moved to another physical eraseblock for ++ * wear-leveling reasons. In this case, UBI calculates CRC checksum of the ++ * contents and uses both @data_crc and @data_size fields. In this case, the ++ * @data_size field contains data size. ++ * ++ * The @used_ebs field is used only for static volumes and indicates how many ++ * eraseblocks the data of the volume takes. For dynamic volumes this field is ++ * not used and always contains zero. ++ * ++ * The @data_pad is calculated when volumes are created using the alignment ++ * parameter. So, effectively, the @data_pad field reduces the size of logical ++ * eraseblocks of this volume. This is very handy when one uses block-oriented ++ * software (say, cramfs) on top of the UBI volume. ++ */ ++struct ubi_vid_hdr { ++ __be32 magic; ++ __u8 version; ++ __u8 vol_type; ++ __u8 copy_flag; ++ __u8 compat; ++ __be32 vol_id; ++ __be32 lnum; ++ __u8 padding1[4]; ++ __be32 data_size; ++ __be32 used_ebs; ++ __be32 data_pad; ++ __be32 data_crc; ++ __u8 padding2[4]; ++ __be64 sqnum; ++ __u8 padding3[12]; ++ __be32 hdr_crc; ++} __attribute__ ((packed)); ++ ++/* Internal UBI volumes count */ ++#define UBI_INT_VOL_COUNT 1 ++ ++/* ++ * Starting ID of internal volumes. There is reserved room for 4096 internal ++ * volumes. ++ */ ++#define UBI_INTERNAL_VOL_START (0x7FFFFFFF - 4096) ++ ++/* The layout volume contains the volume table */ ++ ++#define UBI_LAYOUT_VOLUME_ID UBI_INTERNAL_VOL_START ++#define UBI_LAYOUT_VOLUME_TYPE UBI_VID_DYNAMIC ++#define UBI_LAYOUT_VOLUME_ALIGN 1 ++#define UBI_LAYOUT_VOLUME_EBS 2 ++#define UBI_LAYOUT_VOLUME_NAME "layout volume" ++#define UBI_LAYOUT_VOLUME_COMPAT UBI_COMPAT_REJECT ++ ++/* The maximum number of volumes per one UBI device */ ++#define UBI_MAX_VOLUMES 128 ++ ++/* The maximum volume name length */ ++#define UBI_VOL_NAME_MAX 127 ++ ++/* Size of the volume table record */ ++#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record) ++ ++/* Size of the volume table record without the ending CRC */ ++#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(__be32)) ++ ++/** ++ * struct ubi_vtbl_record - a record in the volume table. ++ * @reserved_pebs: how many physical eraseblocks are reserved for this volume ++ * @alignment: volume alignment ++ * @data_pad: how many bytes are unused at the end of the each physical ++ * eraseblock to satisfy the requested alignment ++ * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) ++ * @upd_marker: if volume update was started but not finished ++ * @name_len: volume name length ++ * @name: the volume name ++ * @flags: volume flags (%UBI_VTBL_AUTORESIZE_FLG) ++ * @padding: reserved, zeroes ++ * @crc: a CRC32 checksum of the record ++ * ++ * The volume table records are stored in the volume table, which is stored in ++ * the layout volume. The layout volume consists of 2 logical eraseblock, each ++ * of which contains a copy of the volume table (i.e., the volume table is ++ * duplicated). The volume table is an array of &struct ubi_vtbl_record ++ * objects indexed by the volume ID. ++ * ++ * If the size of the logical eraseblock is large enough to fit ++ * %UBI_MAX_VOLUMES records, the volume table contains %UBI_MAX_VOLUMES ++ * records. Otherwise, it contains as many records as it can fit (i.e., size of ++ * logical eraseblock divided by sizeof(struct ubi_vtbl_record)). ++ * ++ * The @upd_marker flag is used to implement volume update. It is set to %1 ++ * before update and set to %0 after the update. So if the update operation was ++ * interrupted, UBI knows that the volume is corrupted. ++ * ++ * The @alignment field is specified when the volume is created and cannot be ++ * later changed. It may be useful, for example, when a block-oriented file ++ * system works on top of UBI. The @data_pad field is calculated using the ++ * logical eraseblock size and @alignment. The alignment must be multiple to the ++ * minimal flash I/O unit. If @alignment is 1, all the available space of ++ * the physical eraseblocks is used. ++ * ++ * Empty records contain all zeroes and the CRC checksum of those zeroes. ++ */ ++struct ubi_vtbl_record { ++ __be32 reserved_pebs; ++ __be32 alignment; ++ __be32 data_pad; ++ __u8 vol_type; ++ __u8 upd_marker; ++ __be16 name_len; ++ __u8 name[UBI_VOL_NAME_MAX+1]; ++ __u8 flags; ++ __u8 padding[23]; ++ __be32 crc; ++} __attribute__ ((packed)); ++ ++#endif /* !__UBI_MEDIA_H__ */ +diff -Nurd linux-2.6.24/drivers/mtd/ubi/ubi.h ubifs-v2.6.24/drivers/mtd/ubi/ubi.h +--- linux-2.6.24/drivers/mtd/ubi/ubi.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/ubi.h 2009-04-07 17:14:47.000000000 +0200 +@@ -37,10 +37,9 @@ + #include <linux/string.h> + #include <linux/vmalloc.h> + #include <linux/mtd/mtd.h> +- +-#include <mtd/ubi-header.h> + #include <linux/mtd/ubi.h> + ++#include "ubi-media.h" + #include "scan.h" + #include "debug.h" + +@@ -75,15 +74,22 @@ + #define UBI_IO_RETRIES 3 + + /* +- * Error codes returned by the I/O unit. ++ * Length of the protection queue. The length is effectively equivalent to the ++ * number of (global) erase cycles PEBs are protected from the wear-leveling ++ * worker. ++ */ ++#define UBI_PROT_QUEUE_LEN 10 ++ ++/* ++ * Error codes returned by the I/O sub-system. + * + * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only +- * 0xFF bytes ++ * %0xFF bytes + * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a +- * valid erase counter header, and the rest are %0xFF bytes ++ * valid erase counter header, and the rest are %0xFF bytes + * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) + * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or +- * CRC) ++ * CRC) + * UBI_IO_BITFLIPS: bit-flips were detected and corrected + */ + enum { +@@ -94,8 +100,68 @@ + UBI_IO_BITFLIPS + }; + +-extern int ubi_devices_cnt; +-extern struct ubi_device *ubi_devices[]; ++/** ++ * struct ubi_wl_entry - wear-leveling entry. ++ * @u.rb: link in the corresponding (free/used) RB-tree ++ * @u.list: link in the protection queue ++ * @ec: erase counter ++ * @pnum: physical eraseblock number ++ * ++ * This data structure is used in the WL sub-system. Each physical eraseblock ++ * has a corresponding &struct wl_entry object which may be kept in different ++ * RB-trees. See WL sub-system for details. ++ */ ++struct ubi_wl_entry { ++ union { ++ struct rb_node rb; ++ struct list_head list; ++ } u; ++ int ec; ++ int pnum; ++}; ++ ++/** ++ * struct ubi_ltree_entry - an entry in the lock tree. ++ * @rb: links RB-tree nodes ++ * @vol_id: volume ID of the locked logical eraseblock ++ * @lnum: locked logical eraseblock number ++ * @users: how many tasks are using this logical eraseblock or wait for it ++ * @mutex: read/write mutex to implement read/write access serialization to ++ * the (@vol_id, @lnum) logical eraseblock ++ * ++ * This data structure is used in the EBA sub-system to implement per-LEB ++ * locking. When a logical eraseblock is being locked - corresponding ++ * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree). ++ * See EBA sub-system for details. ++ */ ++struct ubi_ltree_entry { ++ struct rb_node rb; ++ int vol_id; ++ int lnum; ++ int users; ++ struct rw_semaphore mutex; ++}; ++ ++/** ++ * struct ubi_rename_entry - volume re-name description data structure. ++ * @new_name_len: new volume name length ++ * @new_name: new volume name ++ * @remove: if not zero, this volume should be removed, not re-named ++ * @desc: descriptor of the volume ++ * @list: links re-name entries into a list ++ * ++ * This data structure is utilized in the multiple volume re-name code. Namely, ++ * UBI first creates a list of &struct ubi_rename_entry objects from the ++ * &struct ubi_rnvol_req request object, and then utilizes this list to do all ++ * the job. ++ */ ++struct ubi_rename_entry { ++ int new_name_len; ++ char new_name[UBI_VOL_NAME_MAX + 1]; ++ int remove; ++ struct ubi_volume_desc *desc; ++ struct list_head list; ++}; + + struct ubi_volume_desc; + +@@ -105,11 +171,10 @@ + * @cdev: character device object to create character device + * @ubi: reference to the UBI device description object + * @vol_id: volume ID ++ * @ref_count: volume reference count + * @readers: number of users holding this volume in read-only mode + * @writers: number of users holding this volume in read-write mode + * @exclusive: whether somebody holds this volume in exclusive mode +- * @removed: if the volume was removed +- * @checked: if this static volume was checked + * + * @reserved_pebs: how many physical eraseblocks are reserved for this volume + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) +@@ -117,21 +182,31 @@ + * @used_ebs: how many logical eraseblocks in this volume contain data + * @last_eb_bytes: how many bytes are stored in the last logical eraseblock + * @used_bytes: how many bytes of data this volume contains +- * @upd_marker: non-zero if the update marker is set for this volume +- * @corrupted: non-zero if the volume is corrupted (static volumes only) + * @alignment: volume alignment + * @data_pad: how many bytes are not used at the end of physical eraseblocks to +- * satisfy the requested alignment ++ * satisfy the requested alignment + * @name_len: volume name length + * @name: volume name + * +- * @updating: whether the volume is being updated + * @upd_ebs: how many eraseblocks are expected to be updated +- * @upd_bytes: how many bytes are expected to be received +- * @upd_received: how many update bytes were already received +- * @upd_buf: update buffer which is used to collect update data ++ * @ch_lnum: LEB number which is being changing by the atomic LEB change ++ * operation ++ * @ch_dtype: data persistency type which is being changing by the atomic LEB ++ * change operation ++ * @upd_bytes: how many bytes are expected to be received for volume update or ++ * atomic LEB change ++ * @upd_received: how many bytes were already received for volume update or ++ * atomic LEB change ++ * @upd_buf: update buffer which is used to collect update data or data for ++ * atomic LEB change + * + * @eba_tbl: EBA table of this volume (LEB->PEB mapping) ++ * @checked: %1 if this static volume was checked ++ * @corrupted: %1 if the volume is corrupted (static volumes only) ++ * @upd_marker: %1 if the update marker is set for this volume ++ * @updating: %1 if the volume is being updated ++ * @changing_leb: %1 if the atomic LEB change ioctl command is in progress ++ * @direct_writes: %1 if direct writes are enabled for this volume + * + * @gluebi_desc: gluebi UBI volume descriptor + * @gluebi_refcount: reference count of the gluebi MTD device +@@ -150,11 +225,10 @@ + struct cdev cdev; + struct ubi_device *ubi; + int vol_id; ++ int ref_count; + int readers; + int writers; + int exclusive; +- int removed; +- int checked; + + int reserved_pebs; + int vol_type; +@@ -162,23 +236,32 @@ + int used_ebs; + int last_eb_bytes; + long long used_bytes; +- int upd_marker; +- int corrupted; + int alignment; + int data_pad; + int name_len; +- char name[UBI_VOL_NAME_MAX+1]; ++ char name[UBI_VOL_NAME_MAX + 1]; + +- int updating; + int upd_ebs; ++ int ch_lnum; ++ int ch_dtype; + long long upd_bytes; + long long upd_received; + void *upd_buf; + + int *eba_tbl; ++ unsigned int checked:1; ++ unsigned int corrupted:1; ++ unsigned int upd_marker:1; ++ unsigned int updating:1; ++ unsigned int changing_leb:1; ++ unsigned int direct_writes:1; + + #ifdef CONFIG_MTD_UBI_GLUEBI +- /* Gluebi-related stuff may be compiled out */ ++ /* ++ * Gluebi-related stuff may be compiled out. ++ * Note: this should not be built into UBI but should be a separate ++ * ubimtd driver which works on top of UBI and emulates MTD devices. ++ */ + struct ubi_volume_desc *gluebi_desc; + int gluebi_refcount; + struct mtd_info gluebi_mtd; +@@ -186,8 +269,7 @@ + }; + + /** +- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is +- * opened. ++ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened. + * @vol: reference to the corresponding volume description object + * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE) + */ +@@ -200,28 +282,32 @@ + + /** + * struct ubi_device - UBI device description structure +- * @dev: class device object to use the the Linux device model ++ * @dev: UBI device object to use the the Linux device model + * @cdev: character device object to create character device + * @ubi_num: UBI device number + * @ubi_name: UBI device name +- * @major: character device major number + * @vol_count: number of volumes in this UBI device + * @volumes: volumes of this UBI device + * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs, +- * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, @vol->readers, +- * @vol->writers, @vol->exclusive, @vol->removed, @vol->mapping and +- * @vol->eba_tbl. ++ * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, ++ * @vol->readers, @vol->writers, @vol->exclusive, ++ * @vol->ref_count, @vol->mapping and @vol->eba_tbl. ++ * @ref_count: count of references on the UBI device + * + * @rsvd_pebs: count of reserved physical eraseblocks + * @avail_pebs: count of available physical eraseblocks + * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB +- * handling ++ * handling + * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling + * ++ * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end ++ * of UBI initialization + * @vtbl_slots: how many slots are available in the volume table + * @vtbl_size: size of the volume table in bytes + * @vtbl: in-RAM volume table copy +- * @vtbl_mutex: protects on-flash volume table ++ * @volumes_mutex: protects on-flash volume table and serializes volume ++ * changes, like creation, deletion, update, re-size, ++ * re-name and set property + * + * @max_ec: current highest erase counter value + * @mean_ec: current mean erase counter value +@@ -234,19 +320,19 @@ + * @used: RB-tree of used physical eraseblocks + * @free: RB-tree of free physical eraseblocks + * @scrub: RB-tree of physical eraseblocks which need scrubbing +- * @prot: protection trees +- * @prot.pnum: protection tree indexed by physical eraseblock numbers +- * @prot.aec: protection tree indexed by absolute erase counter value +- * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, +- * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works +- * fields ++ * @pq: protection queue (contain physical eraseblocks which are temporarily ++ * protected from the wear-leveling worker) ++ * @pq_head: protection queue head ++ * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, ++ * @move_to, @move_to_put @erase_pending, @wl_scheduled and @works ++ * fields ++ * @move_mutex: serializes eraseblock moves ++ * @work_sem: synchronizes the WL worker with use tasks + * @wl_scheduled: non-zero if the wear-leveling was scheduled + * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any +- * physical eraseblock +- * @abs_ec: absolute erase counter ++ * physical eraseblock + * @move_from: physical eraseblock from where the data is being moved + * @move_to: physical eraseblock where the data is being moved to +- * @move_from_put: if the "from" PEB was put + * @move_to_put: if the "to" PEB was put + * @works: list of pending works + * @works_count: count of pending works +@@ -264,68 +350,70 @@ + * @ro_mode: if the UBI device is in read-only mode + * @leb_size: logical eraseblock size + * @leb_start: starting offset of logical eraseblocks within physical +- * eraseblocks ++ * eraseblocks + * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size + * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size + * @vid_hdr_offset: starting offset of the volume identifier header (might be +- * unaligned) ++ * unaligned) + * @vid_hdr_aloffset: starting offset of the VID header aligned to + * @hdrs_min_io_size + * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset + * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or +- * not ++ * not + * @mtd: MTD device descriptor + * + * @peb_buf1: a buffer of PEB size used for different purposes + * @peb_buf2: another buffer of PEB size used for different purposes +- * @buf_mutex: proptects @peb_buf1 and @peb_buf2 +- * @dbg_peb_buf: buffer of PEB size used for debugging +- * @dbg_buf_mutex: proptects @dbg_peb_buf ++ * @buf_mutex: protects @peb_buf1 and @peb_buf2 ++ * @ckvol_mutex: serializes static volume checking when opening ++ * @mult_mutex: serializes operations on multiple volumes, like re-naming ++ * @dbg_peb_buf: buffer of PEB size used for debugging ++ * @dbg_buf_mutex: protects @dbg_peb_buf + */ + struct ubi_device { + struct cdev cdev; + struct device dev; + int ubi_num; + char ubi_name[sizeof(UBI_NAME_STR)+5]; +- int major; + int vol_count; + struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT]; + spinlock_t volumes_lock; ++ int ref_count; + + int rsvd_pebs; + int avail_pebs; + int beb_rsvd_pebs; + int beb_rsvd_level; + ++ int autoresize_vol_id; + int vtbl_slots; + int vtbl_size; + struct ubi_vtbl_record *vtbl; +- struct mutex vtbl_mutex; ++ struct mutex volumes_mutex; + + int max_ec; ++ /* Note, mean_ec is not updated run-time - should be fixed */ + int mean_ec; + +- /* EBA unit's stuff */ ++ /* EBA sub-system's stuff */ + unsigned long long global_sqnum; + spinlock_t ltree_lock; + struct rb_root ltree; + struct mutex alc_mutex; + +- /* Wear-leveling unit's stuff */ ++ /* Wear-leveling sub-system's stuff */ + struct rb_root used; + struct rb_root free; + struct rb_root scrub; +- struct { +- struct rb_root pnum; +- struct rb_root aec; +- } prot; ++ struct list_head pq[UBI_PROT_QUEUE_LEN]; ++ int pq_head; + spinlock_t wl_lock; ++ struct mutex move_mutex; ++ struct rw_semaphore work_sem; + int wl_scheduled; + struct ubi_wl_entry **lookuptbl; +- unsigned long long abs_ec; + struct ubi_wl_entry *move_from; + struct ubi_wl_entry *move_to; +- int move_from_put; + int move_to_put; + struct list_head works; + int works_count; +@@ -333,7 +421,7 @@ + int thread_enabled; + char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; + +- /* I/O unit's stuff */ ++ /* I/O sub-system's stuff */ + long long flash_size; + int peb_count; + int peb_size; +@@ -355,35 +443,49 @@ + void *peb_buf1; + void *peb_buf2; + struct mutex buf_mutex; ++ struct mutex ckvol_mutex; ++ struct mutex mult_mutex; + #ifdef CONFIG_MTD_UBI_DEBUG + void *dbg_peb_buf; + struct mutex dbg_buf_mutex; + #endif + }; + +-extern struct file_operations ubi_cdev_operations; +-extern struct file_operations ubi_vol_cdev_operations; ++extern struct kmem_cache *ubi_wl_entry_slab; ++extern const struct file_operations ubi_ctrl_cdev_operations; ++extern const struct file_operations ubi_cdev_operations; ++extern const struct file_operations ubi_vol_cdev_operations; + extern struct class *ubi_class; ++extern struct mutex ubi_devices_mutex; + + /* vtbl.c */ + int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, + struct ubi_vtbl_record *vtbl_rec); ++int ubi_vtbl_rename_volumes(struct ubi_device *ubi, ++ struct list_head *rename_list); + int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si); + + /* vmt.c */ + int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); +-int ubi_remove_volume(struct ubi_volume_desc *desc); ++int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl); + int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); +-int ubi_add_volume(struct ubi_device *ubi, int vol_id); +-void ubi_free_volume(struct ubi_device *ubi, int vol_id); ++int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list); ++int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol); ++void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol); + + /* upd.c */ +-int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes); +-int ubi_more_update_data(struct ubi_device *ubi, int vol_id, ++int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, ++ long long bytes); ++int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count); ++int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, ++ const struct ubi_leb_change_req *req); ++int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, ++ const void __user *buf, int count); + + /* misc.c */ +-int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); ++int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, ++ int length); + int ubi_check_volume(struct ubi_device *ubi, int vol_id); + void ubi_calculate_reserved(struct ubi_device *ubi); + +@@ -399,20 +501,20 @@ + #endif + + /* eba.c */ +-int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum); +-int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, +- int offset, int len, int check); +-int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum, ++int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, ++ int lnum); ++int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, ++ void *buf, int offset, int len, int check); ++int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, + const void *buf, int offset, int len, int dtype); +-int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum, +- const void *buf, int len, int dtype, ++int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, ++ int lnum, const void *buf, int len, int dtype, + int used_ebs); +-int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum, +- const void *buf, int len, int dtype); ++int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, ++ int lnum, const void *buf, int len, int dtype); + int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr); + int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); +-void ubi_eba_close(const struct ubi_device *ubi); + + /* wl.c */ + int ubi_wl_get_peb(struct ubi_device *ubi, int dtype); +@@ -421,6 +523,7 @@ + int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum); + int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); + void ubi_wl_close(struct ubi_device *ubi); ++int ubi_thread(void *u); + + /* io.c */ + int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, +@@ -439,6 +542,14 @@ + int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr); + ++/* build.c */ ++int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset); ++int ubi_detach_mtd_dev(int ubi_num, int anyway); ++struct ubi_device *ubi_get_device(int ubi_num); ++void ubi_put_device(struct ubi_device *ubi); ++struct ubi_device *ubi_get_by_major(int major); ++int ubi_major2num(int major); ++ + /* + * ubi_rb_for_each_entry - walk an RB-tree. + * @rb: a pointer to type 'struct rb_node' to to use as a loop counter +@@ -523,8 +634,10 @@ + */ + static inline void ubi_ro_mode(struct ubi_device *ubi) + { +- ubi->ro_mode = 1; +- ubi_warn("switch to read-only mode"); ++ if (!ubi->ro_mode) { ++ ubi->ro_mode = 1; ++ ubi_warn("switch to read-only mode"); ++ } + } + + /** +diff -Nurd linux-2.6.24/drivers/mtd/ubi/upd.c ubifs-v2.6.24/drivers/mtd/ubi/upd.c +--- linux-2.6.24/drivers/mtd/ubi/upd.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/upd.c 2009-04-07 17:14:47.000000000 +0200 +@@ -22,7 +22,8 @@ + */ + + /* +- * This file contains implementation of the volume update functionality. ++ * This file contains implementation of the volume update and atomic LEB change ++ * functionality. + * + * The update operation is based on the per-volume update marker which is + * stored in the volume table. The update marker is set before the update +@@ -38,36 +39,37 @@ + */ + + #include <linux/err.h> +-#include <asm/uaccess.h> +-#include <asm/div64.h> ++#include <linux/uaccess.h> + #include "ubi.h" + + /** + * set_update_marker - set update marker. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * +- * This function sets the update marker flag for volume @vol_id. Returns zero ++ * This function sets the update marker flag for volume @vol. Returns zero + * in case of success and a negative error code in case of failure. + */ +-static int set_update_marker(struct ubi_device *ubi, int vol_id) ++static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) + { + int err; + struct ubi_vtbl_record vtbl_rec; +- struct ubi_volume *vol = ubi->volumes[vol_id]; + +- dbg_msg("set update marker for volume %d", vol_id); ++ dbg_gen("set update marker for volume %d", vol->vol_id); + + if (vol->upd_marker) { +- ubi_assert(ubi->vtbl[vol_id].upd_marker); +- dbg_msg("already set"); ++ ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); ++ dbg_gen("already set"); + return 0; + } + +- memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); ++ memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], ++ sizeof(struct ubi_vtbl_record)); + vtbl_rec.upd_marker = 1; + +- err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); ++ mutex_lock(&ubi->volumes_mutex); ++ err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); ++ mutex_unlock(&ubi->volumes_mutex); + vol->upd_marker = 1; + return err; + } +@@ -75,38 +77,40 @@ + /** + * clear_update_marker - clear update marker. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @bytes: new data size in bytes + * +- * This function clears the update marker for volume @vol_id, sets new volume ++ * This function clears the update marker for volume @vol, sets new volume + * data size and clears the "corrupted" flag (static volumes only). Returns + * zero in case of success and a negative error code in case of failure. + */ +-static int clear_update_marker(struct ubi_device *ubi, int vol_id, long long bytes) ++static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, ++ long long bytes) + { + int err; +- uint64_t tmp; + struct ubi_vtbl_record vtbl_rec; +- struct ubi_volume *vol = ubi->volumes[vol_id]; + +- dbg_msg("clear update marker for volume %d", vol_id); ++ dbg_gen("clear update marker for volume %d", vol->vol_id); + +- memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); ++ memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], ++ sizeof(struct ubi_vtbl_record)); + ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); + vtbl_rec.upd_marker = 0; + + if (vol->vol_type == UBI_STATIC_VOLUME) { + vol->corrupted = 0; +- vol->used_bytes = tmp = bytes; +- vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size); +- vol->used_ebs = tmp; ++ vol->used_bytes = bytes; ++ vol->used_ebs = div_u64_rem(bytes, vol->usable_leb_size, ++ &vol->last_eb_bytes); + if (vol->last_eb_bytes) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + +- err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); ++ mutex_lock(&ubi->volumes_mutex); ++ err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); ++ mutex_unlock(&ubi->volumes_mutex); + vol->upd_marker = 0; + return err; + } +@@ -114,35 +118,35 @@ + /** + * ubi_start_update - start volume update. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @bytes: update bytes + * + * This function starts volume update operation. If @bytes is zero, the volume + * is just wiped out. Returns zero in case of success and a negative error code + * in case of failure. + */ +-int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes) ++int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, ++ long long bytes) + { + int i, err; +- uint64_t tmp; +- struct ubi_volume *vol = ubi->volumes[vol_id]; + +- dbg_msg("start update of volume %d, %llu bytes", vol_id, bytes); ++ dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes); ++ ubi_assert(!vol->updating && !vol->changing_leb); + vol->updating = 1; + +- err = set_update_marker(ubi, vol_id); ++ err = set_update_marker(ubi, vol); + if (err) + return err; + + /* Before updating - wipe out the volume */ + for (i = 0; i < vol->reserved_pebs; i++) { +- err = ubi_eba_unmap_leb(ubi, vol_id, i); ++ err = ubi_eba_unmap_leb(ubi, vol, i); + if (err) + return err; + } + + if (bytes == 0) { +- err = clear_update_marker(ubi, vol_id, 0); ++ err = clear_update_marker(ubi, vol, 0); + if (err) + return err; + err = ubi_wl_flush(ubi); +@@ -154,18 +158,50 @@ + if (!vol->upd_buf) + return -ENOMEM; + +- tmp = bytes; +- vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size); +- vol->upd_ebs += tmp; ++ vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1, ++ vol->usable_leb_size); + vol->upd_bytes = bytes; + vol->upd_received = 0; + return 0; + } + + /** ++ * ubi_start_leb_change - start atomic LEB change. ++ * @ubi: UBI device description object ++ * @vol: volume description object ++ * @req: operation request ++ * ++ * This function starts atomic LEB change operation. Returns zero in case of ++ * success and a negative error code in case of failure. ++ */ ++int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, ++ const struct ubi_leb_change_req *req) ++{ ++ ubi_assert(!vol->updating && !vol->changing_leb); ++ ++ dbg_gen("start changing LEB %d:%d, %u bytes", ++ vol->vol_id, req->lnum, req->bytes); ++ if (req->bytes == 0) ++ return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0, ++ req->dtype); ++ ++ vol->upd_bytes = req->bytes; ++ vol->upd_received = 0; ++ vol->changing_leb = 1; ++ vol->ch_lnum = req->lnum; ++ vol->ch_dtype = req->dtype; ++ ++ vol->upd_buf = vmalloc(req->bytes); ++ if (!vol->upd_buf) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** + * write_leb - write update data. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: data size +@@ -191,25 +227,22 @@ + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +-static int write_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, +- int len, int used_ebs) ++static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, ++ void *buf, int len, int used_ebs) + { +- int err, l; +- struct ubi_volume *vol = ubi->volumes[vol_id]; ++ int err; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { +- l = ALIGN(len, ubi->min_io_size); +- memset(buf + len, 0xFF, l - len); ++ int l = ALIGN(len, ubi->min_io_size); + +- l = ubi_calc_data_len(ubi, buf, l); +- if (l == 0) { +- dbg_msg("all %d bytes contain 0xFF - skip", len); ++ memset(buf + len, 0xFF, l - len); ++ len = ubi_calc_data_len(ubi, buf, l); ++ if (len == 0) { ++ dbg_gen("all %d bytes contain 0xFF - skip", len); + return 0; + } +- if (len != l) +- dbg_msg("skip last %d bytes (0xFF)", len - l); + +- err = ubi_eba_write_leb(ubi, vol_id, lnum, buf, 0, l, ++ err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, + UBI_UNKNOWN); + } else { + /* +@@ -222,7 +255,7 @@ + * contain zeros, not random trash. + */ + memset(buf + len, 0, vol->usable_leb_size - len); +- err = ubi_eba_write_leb_st(ubi, vol_id, lnum, buf, len, ++ err = ubi_eba_write_leb_st(ubi, vol, lnum, buf, len, + UBI_UNKNOWN, used_ebs); + } + +@@ -231,33 +264,29 @@ + + /** + * ubi_more_update_data - write more update data. ++ * @ubi: UBI device description object + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function writes more data to the volume which is being updated. It may +- * be called arbitrary number of times until all of the update data arrive. +- * This function returns %0 in case of success, number of bytes written during +- * the last call if the whole volume update was successfully finished, and a ++ * be called arbitrary number of times until all the update data arriveis. This ++ * function returns %0 in case of success, number of bytes written during the ++ * last call if the whole volume update has been successfully finished, and a + * negative error code in case of failure. + */ +-int ubi_more_update_data(struct ubi_device *ubi, int vol_id, ++int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count) + { +- uint64_t tmp; +- struct ubi_volume *vol = ubi->volumes[vol_id]; + int lnum, offs, err = 0, len, to_write = count; + +- dbg_msg("write %d of %lld bytes, %lld already passed", ++ dbg_gen("write %d of %lld bytes, %lld already passed", + count, vol->upd_bytes, vol->upd_received); + + if (ubi->ro_mode) + return -EROFS; + +- tmp = vol->upd_received; +- offs = do_div(tmp, vol->usable_leb_size); +- lnum = tmp; +- ++ lnum = div_u64_rem(vol->upd_received, vol->usable_leb_size, &offs); + if (vol->upd_received + count > vol->upd_bytes) + to_write = count = vol->upd_bytes - vol->upd_received; + +@@ -290,8 +319,8 @@ + * is the last chunk, it's time to flush the buffer. + */ + ubi_assert(flush_len <= vol->usable_leb_size); +- err = write_leb(ubi, vol_id, lnum, vol->upd_buf, +- flush_len, vol->upd_ebs); ++ err = write_leb(ubi, vol, lnum, vol->upd_buf, flush_len, ++ vol->upd_ebs); + if (err) + return err; + } +@@ -318,8 +347,8 @@ + + if (len == vol->usable_leb_size || + vol->upd_received + len == vol->upd_bytes) { +- err = write_leb(ubi, vol_id, lnum, vol->upd_buf, len, +- vol->upd_ebs); ++ err = write_leb(ubi, vol, lnum, vol->upd_buf, ++ len, vol->upd_ebs); + if (err) + break; + } +@@ -333,16 +362,72 @@ + ubi_assert(vol->upd_received <= vol->upd_bytes); + if (vol->upd_received == vol->upd_bytes) { + /* The update is finished, clear the update marker */ +- err = clear_update_marker(ubi, vol_id, vol->upd_bytes); ++ err = clear_update_marker(ubi, vol, vol->upd_bytes); + if (err) + return err; + err = ubi_wl_flush(ubi); + if (err == 0) { ++ vol->updating = 0; + err = to_write; + vfree(vol->upd_buf); +- vol->updating = 0; + } + } + + return err; + } ++ ++/** ++ * ubi_more_leb_change_data - accept more data for atomic LEB change. ++ * @ubi: UBI device description object ++ * @vol: volume description object ++ * @buf: write data (user-space memory buffer) ++ * @count: how much bytes to write ++ * ++ * This function accepts more data to the volume which is being under the ++ * "atomic LEB change" operation. It may be called arbitrary number of times ++ * until all data arrives. This function returns %0 in case of success, number ++ * of bytes written during the last call if the whole "atomic LEB change" ++ * operation has been successfully finished, and a negative error code in case ++ * of failure. ++ */ ++int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, ++ const void __user *buf, int count) ++{ ++ int err; ++ ++ dbg_gen("write %d of %lld bytes, %lld already passed", ++ count, vol->upd_bytes, vol->upd_received); ++ ++ if (ubi->ro_mode) ++ return -EROFS; ++ ++ if (vol->upd_received + count > vol->upd_bytes) ++ count = vol->upd_bytes - vol->upd_received; ++ ++ err = copy_from_user(vol->upd_buf + vol->upd_received, buf, count); ++ if (err) ++ return -EFAULT; ++ ++ vol->upd_received += count; ++ ++ if (vol->upd_received == vol->upd_bytes) { ++ int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); ++ ++ memset(vol->upd_buf + vol->upd_bytes, 0xFF, ++ len - vol->upd_bytes); ++ len = ubi_calc_data_len(ubi, vol->upd_buf, len); ++ err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, ++ vol->upd_buf, len, UBI_UNKNOWN); ++ if (err) ++ return err; ++ } ++ ++ ubi_assert(vol->upd_received <= vol->upd_bytes); ++ if (vol->upd_received == vol->upd_bytes) { ++ vol->changing_leb = 0; ++ err = count; ++ vfree(vol->upd_buf); ++ } ++ ++ return err; ++} +diff -Nurd linux-2.6.24/drivers/mtd/ubi/vmt.c ubifs-v2.6.24/drivers/mtd/ubi/vmt.c +--- linux-2.6.24/drivers/mtd/ubi/vmt.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/vmt.c 2009-04-07 17:14:47.000000000 +0200 +@@ -24,13 +24,12 @@ + */ + + #include <linux/err.h> +-#include <asm/div64.h> + #include "ubi.h" + + #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +-static void paranoid_check_volumes(struct ubi_device *ubi); ++static int paranoid_check_volumes(struct ubi_device *ubi); + #else +-#define paranoid_check_volumes(ubi) ++#define paranoid_check_volumes(ubi) 0 + #endif + + static ssize_t vol_attribute_show(struct device *dev, +@@ -63,21 +62,30 @@ + * B. process 2 removes volume Y; + * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file; + * +- * What we want to do in a situation like that is to return error when the file +- * is read. This is done by means of the 'removed' flag and the 'vol_lock' of +- * the UBI volume description object. ++ * In this situation, this function will return %-ENODEV because it will find ++ * out that the volume was removed from the @ubi->volumes array. + */ + static ssize_t vol_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) + { + int ret; + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); ++ struct ubi_device *ubi; + +- spin_lock(&vol->ubi->volumes_lock); +- if (vol->removed) { +- spin_unlock(&vol->ubi->volumes_lock); ++ ubi = ubi_get_device(vol->ubi->ubi_num); ++ if (!ubi) ++ return -ENODEV; ++ ++ spin_lock(&ubi->volumes_lock); ++ if (!ubi->volumes[vol->vol_id]) { ++ spin_unlock(&ubi->volumes_lock); ++ ubi_put_device(ubi); + return -ENODEV; + } ++ /* Take a reference to prevent volume removal */ ++ vol->ref_count += 1; ++ spin_unlock(&ubi->volumes_lock); ++ + if (attr == &attr_vol_reserved_ebs) + ret = sprintf(buf, "%d\n", vol->reserved_pebs); + else if (attr == &attr_vol_type) { +@@ -94,15 +102,22 @@ + ret = sprintf(buf, "%d\n", vol->corrupted); + else if (attr == &attr_vol_alignment) + ret = sprintf(buf, "%d\n", vol->alignment); +- else if (attr == &attr_vol_usable_eb_size) { ++ else if (attr == &attr_vol_usable_eb_size) + ret = sprintf(buf, "%d\n", vol->usable_leb_size); +- } else if (attr == &attr_vol_data_bytes) ++ else if (attr == &attr_vol_data_bytes) + ret = sprintf(buf, "%lld\n", vol->used_bytes); + else if (attr == &attr_vol_upd_marker) + ret = sprintf(buf, "%d\n", vol->upd_marker); + else +- BUG(); +- spin_unlock(&vol->ubi->volumes_lock); ++ /* This must be a bug */ ++ ret = -EINVAL; ++ ++ /* We've done the operation, drop volume and UBI device references */ ++ spin_lock(&ubi->volumes_lock); ++ vol->ref_count -= 1; ++ ubi_assert(vol->ref_count >= 0); ++ spin_unlock(&ubi->volumes_lock); ++ ubi_put_device(ubi); + return ret; + } + +@@ -110,7 +125,8 @@ + static void vol_release(struct device *dev) + { + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); +- ubi_assert(vol->removed); ++ ++ kfree(vol->eba_tbl); + kfree(vol); + } + +@@ -152,9 +168,7 @@ + if (err) + return err; + err = device_create_file(&vol->dev, &attr_vol_upd_marker); +- if (err) +- return err; +- return 0; ++ return err; + } + + /** +@@ -180,16 +194,17 @@ + * @req: volume creation request + * + * This function creates volume described by @req. If @req->vol_id id +- * %UBI_VOL_NUM_AUTO, this function automatically assigne ID to the new volume ++ * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume + * and saves it in @req->vol_id. Returns zero in case of success and a negative +- * error code in case of failure. ++ * error code in case of failure. Note, the caller has to have the ++ * @ubi->volumes_mutex locked. + */ + int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) + { +- int i, err, vol_id = req->vol_id; ++ int i, err, vol_id = req->vol_id, do_free = 1; + struct ubi_volume *vol; + struct ubi_vtbl_record vtbl_rec; +- uint64_t bytes; ++ dev_t dev; + + if (ubi->ro_mode) + return -EROFS; +@@ -199,10 +214,9 @@ + return -ENOMEM; + + spin_lock(&ubi->volumes_lock); +- + if (vol_id == UBI_VOL_NUM_AUTO) { + /* Find unused volume ID */ +- dbg_msg("search for vacant volume ID"); ++ dbg_gen("search for vacant volume ID"); + for (i = 0; i < ubi->vtbl_slots; i++) + if (!ubi->volumes[i]) { + vol_id = i; +@@ -217,7 +231,7 @@ + req->vol_id = vol_id; + } + +- dbg_msg("volume ID %d, %llu bytes, type %d, name %s", ++ dbg_gen("volume ID %d, %llu bytes, type %d, name %s", + vol_id, (unsigned long long)req->bytes, + (int)req->vol_type, req->name); + +@@ -237,12 +251,10 @@ + goto out_unlock; + } + +- /* Calculate how many eraseblocks are requested */ ++ /* Calculate how many eraseblocks are requested */ + vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; +- bytes = req->bytes; +- if (do_div(bytes, vol->usable_leb_size)) +- vol->reserved_pebs = 1; +- vol->reserved_pebs += bytes; ++ vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1, ++ vol->usable_leb_size); + + /* Reserve physical eraseblocks */ + if (vol->reserved_pebs > ubi->avail_pebs) { +@@ -252,17 +264,15 @@ + } + ubi->avail_pebs -= vol->reserved_pebs; + ubi->rsvd_pebs += vol->reserved_pebs; ++ spin_unlock(&ubi->volumes_lock); + + vol->vol_id = vol_id; + vol->alignment = req->alignment; + vol->data_pad = ubi->leb_size % vol->alignment; + vol->vol_type = req->vol_type; + vol->name_len = req->name_len; +- memcpy(vol->name, req->name, vol->name_len + 1); +- vol->exclusive = 1; ++ memcpy(vol->name, req->name, vol->name_len); + vol->ubi = ubi; +- ubi->volumes[vol_id] = vol; +- spin_unlock(&ubi->volumes_lock); + + /* + * Finish all pending erases because there may be some LEBs belonging +@@ -287,10 +297,10 @@ + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + } else { +- bytes = vol->used_bytes; +- vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size); +- vol->used_ebs = bytes; +- if (vol->last_eb_bytes) ++ vol->used_ebs = div_u64_rem(vol->used_bytes, ++ vol->usable_leb_size, ++ &vol->last_eb_bytes); ++ if (vol->last_eb_bytes != 0) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; +@@ -299,9 +309,10 @@ + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; +- err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol_id + 1), 1); ++ dev = MKDEV(MAJOR(ubi->cdev.dev), vol_id + 1); ++ err = cdev_add(&vol->cdev, dev, 1); + if (err) { +- ubi_err("cannot add character device for volume %d", vol_id); ++ ubi_err("cannot add character device"); + goto out_mapping; + } + +@@ -311,12 +322,15 @@ + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; +- vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1); ++ vol->dev.devt = dev; + vol->dev.class = ubi_class; ++ + sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); +- if (err) ++ if (err) { ++ ubi_err("cannot register device"); + goto out_gluebi; ++ } + + err = volume_sysfs_init(ubi, vol); + if (err) +@@ -332,98 +346,108 @@ + vtbl_rec.vol_type = UBI_VID_DYNAMIC; + else + vtbl_rec.vol_type = UBI_VID_STATIC; +- memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); ++ memcpy(vtbl_rec.name, vol->name, vol->name_len); + + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_sysfs; + + spin_lock(&ubi->volumes_lock); ++ ubi->volumes[vol_id] = vol; + ubi->vol_count += 1; +- vol->exclusive = 0; + spin_unlock(&ubi->volumes_lock); + +- paranoid_check_volumes(ubi); +- return 0; ++ err = paranoid_check_volumes(ubi); ++ return err; + ++out_sysfs: ++ /* ++ * We have registered our device, we should not free the volume ++ * description object in this function in case of an error - it is ++ * freed by the release function. ++ * ++ * Get device reference to prevent the release function from being ++ * called just after sysfs has been closed. ++ */ ++ do_free = 0; ++ get_device(&vol->dev); ++ volume_sysfs_close(vol); + out_gluebi: +- err = ubi_destroy_gluebi(vol); ++ if (ubi_destroy_gluebi(vol)) ++ dbg_err("cannot destroy gluebi for volume %d:%d", ++ ubi->ubi_num, vol_id); + out_cdev: + cdev_del(&vol->cdev); + out_mapping: +- kfree(vol->eba_tbl); ++ if (do_free) ++ kfree(vol->eba_tbl); + out_acc: + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= vol->reserved_pebs; + ubi->avail_pebs += vol->reserved_pebs; +- ubi->volumes[vol_id] = NULL; + out_unlock: + spin_unlock(&ubi->volumes_lock); +- kfree(vol); +- return err; +- +- /* +- * We are registered, so @vol is destroyed in the release function and +- * we have to de-initialize differently. +- */ +-out_sysfs: +- err = ubi_destroy_gluebi(vol); +- cdev_del(&vol->cdev); +- kfree(vol->eba_tbl); +- spin_lock(&ubi->volumes_lock); +- ubi->rsvd_pebs -= vol->reserved_pebs; +- ubi->avail_pebs += vol->reserved_pebs; +- ubi->volumes[vol_id] = NULL; +- spin_unlock(&ubi->volumes_lock); +- volume_sysfs_close(vol); ++ if (do_free) ++ kfree(vol); ++ else ++ put_device(&vol->dev); ++ ubi_err("cannot create volume %d, error %d", vol_id, err); + return err; + } + + /** + * ubi_remove_volume - remove volume. + * @desc: volume descriptor ++ * @no_vtbl: do not change volume table if not zero + * + * This function removes volume described by @desc. The volume has to be opened + * in "exclusive" mode. Returns zero in case of success and a negative error +- * code in case of failure. ++ * code in case of failure. The caller has to have the @ubi->volumes_mutex ++ * locked. + */ +-int ubi_remove_volume(struct ubi_volume_desc *desc) ++int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) + { + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; + +- dbg_msg("remove UBI volume %d", vol_id); ++ dbg_gen("remove UBI volume %d", vol_id); + ubi_assert(desc->mode == UBI_EXCLUSIVE); + ubi_assert(vol == ubi->volumes[vol_id]); + + if (ubi->ro_mode) + return -EROFS; + ++ spin_lock(&ubi->volumes_lock); ++ if (vol->ref_count > 1) { ++ /* ++ * The volume is busy, probably someone is reading one of its ++ * sysfs files. ++ */ ++ err = -EBUSY; ++ goto out_unlock; ++ } ++ ubi->volumes[vol_id] = NULL; ++ spin_unlock(&ubi->volumes_lock); ++ + err = ubi_destroy_gluebi(vol); + if (err) +- return err; ++ goto out_err; + +- err = ubi_change_vtbl_record(ubi, vol_id, NULL); +- if (err) +- return err; ++ if (!no_vtbl) { ++ err = ubi_change_vtbl_record(ubi, vol_id, NULL); ++ if (err) ++ goto out_err; ++ } + + for (i = 0; i < vol->reserved_pebs; i++) { +- err = ubi_eba_unmap_leb(ubi, vol_id, i); ++ err = ubi_eba_unmap_leb(ubi, vol, i); + if (err) +- return err; ++ goto out_err; + } + +- spin_lock(&ubi->volumes_lock); +- vol->removed = 1; +- ubi->volumes[vol_id] = NULL; +- spin_unlock(&ubi->volumes_lock); +- +- kfree(vol->eba_tbl); +- vol->eba_tbl = NULL; + cdev_del(&vol->cdev); + volume_sysfs_close(vol); +- kfree(desc); + + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= reserved_pebs; +@@ -440,9 +464,17 @@ + ubi->vol_count -= 1; + spin_unlock(&ubi->volumes_lock); + +- paranoid_check_volumes(ubi); +- module_put(THIS_MODULE); +- return 0; ++ if (!no_vtbl) ++ err = paranoid_check_volumes(ubi); ++ return err; ++ ++out_err: ++ ubi_err("cannot remove volume %d, error %d", vol_id, err); ++ spin_lock(&ubi->volumes_lock); ++ ubi->volumes[vol_id] = vol; ++out_unlock: ++ spin_unlock(&ubi->volumes_lock); ++ return err; + } + + /** +@@ -450,8 +482,9 @@ + * @desc: volume descriptor + * @reserved_pebs: new size in physical eraseblocks + * +- * This function returns zero in case of success, and a negative error code in +- * case of failure. ++ * This function re-sizes the volume and returns zero in case of success, and a ++ * negative error code in case of failure. The caller has to have the ++ * @ubi->volumes_mutex locked. + */ + int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) + { +@@ -464,10 +497,8 @@ + if (ubi->ro_mode) + return -EROFS; + +- dbg_msg("re-size volume %d to from %d to %d PEBs", ++ dbg_gen("re-size volume %d to from %d to %d PEBs", + vol_id, vol->reserved_pebs, reserved_pebs); +- ubi_assert(desc->mode == UBI_EXCLUSIVE); +- ubi_assert(vol == ubi->volumes[vol_id]); + + if (vol->vol_type == UBI_STATIC_VOLUME && + reserved_pebs < vol->used_ebs) { +@@ -487,6 +518,14 @@ + for (i = 0; i < reserved_pebs; i++) + new_mapping[i] = UBI_LEB_UNMAPPED; + ++ spin_lock(&ubi->volumes_lock); ++ if (vol->ref_count > 1) { ++ spin_unlock(&ubi->volumes_lock); ++ err = -EBUSY; ++ goto out_free; ++ } ++ spin_unlock(&ubi->volumes_lock); ++ + /* Reserve physical eraseblocks */ + pebs = reserved_pebs - vol->reserved_pebs; + if (pebs > 0) { +@@ -516,7 +555,7 @@ + + if (pebs < 0) { + for (i = 0; i < -pebs; i++) { +- err = ubi_eba_unmap_leb(ubi, vol_id, reserved_pebs + i); ++ err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); + if (err) + goto out_acc; + } +@@ -547,8 +586,8 @@ + (long long)vol->used_ebs * vol->usable_leb_size; + } + +- paranoid_check_volumes(ubi); +- return 0; ++ err = paranoid_check_volumes(ubi); ++ return err; + + out_acc: + if (pebs > 0) { +@@ -563,29 +602,67 @@ + } + + /** ++ * ubi_rename_volumes - re-name UBI volumes. ++ * @ubi: UBI device description object ++ * @rename_list: list of &struct ubi_rename_entry objects ++ * ++ * This function re-names or removes volumes specified in the re-name list. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list) ++{ ++ int err; ++ struct ubi_rename_entry *re; ++ ++ err = ubi_vtbl_rename_volumes(ubi, rename_list); ++ if (err) ++ return err; ++ ++ list_for_each_entry(re, rename_list, list) { ++ if (re->remove) { ++ err = ubi_remove_volume(re->desc, 1); ++ if (err) ++ break; ++ } else { ++ struct ubi_volume *vol = re->desc->vol; ++ ++ spin_lock(&ubi->volumes_lock); ++ vol->name_len = re->new_name_len; ++ memcpy(vol->name, re->new_name, re->new_name_len + 1); ++ spin_unlock(&ubi->volumes_lock); ++ } ++ } ++ ++ if (!err) ++ err = paranoid_check_volumes(ubi); ++ return err; ++} ++ ++/** + * ubi_add_volume - add volume. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * +- * This function adds an existin volume and initializes all its data +- * structures. Returnes zero in case of success and a negative error code in ++ * This function adds an existing volume and initializes all its data ++ * structures. Returns zero in case of success and a negative error code in + * case of failure. + */ +-int ubi_add_volume(struct ubi_device *ubi, int vol_id) ++int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) + { +- int err; +- struct ubi_volume *vol = ubi->volumes[vol_id]; ++ int err, vol_id = vol->vol_id; ++ dev_t dev; + +- dbg_msg("add volume %d", vol_id); +- ubi_dbg_dump_vol_info(vol); +- ubi_assert(vol); ++ dbg_gen("add volume %d", vol_id); + + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; +- err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol->vol_id + 1), 1); ++ dev = MKDEV(MAJOR(ubi->cdev.dev), vol->vol_id + 1); ++ err = cdev_add(&vol->cdev, dev, 1); + if (err) { +- ubi_err("cannot add character device for volume %d", vol_id); ++ ubi_err("cannot add character device for volume %d, error %d", ++ vol_id, err); + return err; + } + +@@ -595,7 +672,7 @@ + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; +- vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1); ++ vol->dev.devt = dev; + vol->dev.class = ubi_class; + sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); +@@ -610,8 +687,8 @@ + return err; + } + +- paranoid_check_volumes(ubi); +- return 0; ++ err = paranoid_check_volumes(ubi); ++ return err; + + out_gluebi: + err = ubi_destroy_gluebi(vol); +@@ -623,22 +700,19 @@ + /** + * ubi_free_volume - free volume. + * @ubi: UBI device description object +- * @vol_id: volume ID ++ * @vol: volume description object + * +- * This function frees all resources for volume @vol_id but does not remove it. ++ * This function frees all resources for volume @vol but does not remove it. + * Used only when the UBI device is detached. + */ +-void ubi_free_volume(struct ubi_device *ubi, int vol_id) ++void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) + { + int err; +- struct ubi_volume *vol = ubi->volumes[vol_id]; + +- dbg_msg("free volume %d", vol_id); +- ubi_assert(vol); ++ dbg_gen("free volume %d", vol->vol_id); + +- vol->removed = 1; ++ ubi->volumes[vol->vol_id] = NULL; + err = ubi_destroy_gluebi(vol); +- ubi->volumes[vol_id] = NULL; + cdev_del(&vol->cdev); + volume_sysfs_close(vol); + } +@@ -649,8 +723,10 @@ + * paranoid_check_volume - check volume information. + * @ubi: UBI device description object + * @vol_id: volume ID ++ * ++ * Returns zero if volume is all right and a a negative error code if not. + */ +-static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) ++static int paranoid_check_volume(struct ubi_device *ubi, int vol_id) + { + int idx = vol_id2idx(ubi, vol_id); + int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; +@@ -668,16 +744,7 @@ + goto fail; + } + spin_unlock(&ubi->volumes_lock); +- return; +- } +- +- if (vol->exclusive) { +- /* +- * The volume may be being created at the moment, do not check +- * it (e.g., it may be in the middle of ubi_create_volume(). +- */ +- spin_unlock(&ubi->volumes_lock); +- return; ++ return 0; + } + + if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || +@@ -690,7 +757,7 @@ + goto fail; + } + +- n = vol->alignment % ubi->min_io_size; ++ n = vol->alignment & (ubi->min_io_size - 1); + if (vol->alignment != 1 && n) { + ubi_err("alignment is not multiple of min I/O unit"); + goto fail; +@@ -708,11 +775,6 @@ + goto fail; + } + +- if (vol->upd_marker != 0 && vol->upd_marker != 1) { +- ubi_err("bad upd_marker"); +- goto fail; +- } +- + if (vol->upd_marker && vol->corrupted) { + dbg_err("update marker and corrupted simultaneously"); + goto fail; +@@ -747,7 +809,7 @@ + + n = (long long)vol->used_ebs * vol->usable_leb_size; + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { +- if (vol->corrupted != 0) { ++ if (vol->corrupted) { + ubi_err("corrupted dynamic volume"); + goto fail; + } +@@ -764,10 +826,6 @@ + goto fail; + } + } else { +- if (vol->corrupted != 0 && vol->corrupted != 1) { +- ubi_err("bad corrupted"); +- goto fail; +- } + if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) { + ubi_err("bad used_ebs"); + goto fail; +@@ -796,33 +854,39 @@ + + if (alignment != vol->alignment || data_pad != vol->data_pad || + upd_marker != vol->upd_marker || vol_type != vol->vol_type || +- name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { ++ name_len != vol->name_len || strncmp(name, vol->name, name_len)) { + ubi_err("volume info is different"); + goto fail; + } + + spin_unlock(&ubi->volumes_lock); +- return; ++ return 0; + + fail: + ubi_err("paranoid check failed for volume %d", vol_id); +- ubi_dbg_dump_vol_info(vol); ++ if (vol) ++ ubi_dbg_dump_vol_info(vol); + ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); + spin_unlock(&ubi->volumes_lock); +- BUG(); ++ return -EINVAL; + } + + /** + * paranoid_check_volumes - check information about all volumes. + * @ubi: UBI device description object ++ * ++ * Returns zero if volumes are all right and a a negative error code if not. + */ +-static void paranoid_check_volumes(struct ubi_device *ubi) ++static int paranoid_check_volumes(struct ubi_device *ubi) + { +- int i; ++ int i, err = 0; + +- mutex_lock(&ubi->vtbl_mutex); +- for (i = 0; i < ubi->vtbl_slots; i++) +- paranoid_check_volume(ubi, i); +- mutex_unlock(&ubi->vtbl_mutex); ++ for (i = 0; i < ubi->vtbl_slots; i++) { ++ err = paranoid_check_volume(ubi, i); ++ if (err) ++ break; ++ } ++ ++ return err; + } + #endif +diff -Nurd linux-2.6.24/drivers/mtd/ubi/vtbl.c ubifs-v2.6.24/drivers/mtd/ubi/vtbl.c +--- linux-2.6.24/drivers/mtd/ubi/vtbl.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/vtbl.c 2009-04-07 17:14:47.000000000 +0200 +@@ -86,8 +86,10 @@ + { + int i, err; + uint32_t crc; ++ struct ubi_volume *layout_vol; + + ubi_assert(idx >= 0 && idx < ubi->vtbl_slots); ++ layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; + + if (!vtbl_rec) + vtbl_rec = &empty_vtbl_record; +@@ -96,31 +98,75 @@ + vtbl_rec->crc = cpu_to_be32(crc); + } + +- mutex_lock(&ubi->vtbl_mutex); + memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record)); + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { +- err = ubi_eba_unmap_leb(ubi, UBI_LAYOUT_VOL_ID, i); +- if (err) { +- mutex_unlock(&ubi->vtbl_mutex); ++ err = ubi_eba_unmap_leb(ubi, layout_vol, i); ++ if (err) + return err; +- } +- err = ubi_eba_write_leb(ubi, UBI_LAYOUT_VOL_ID, i, ubi->vtbl, 0, ++ ++ err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, + ubi->vtbl_size, UBI_LONGTERM); +- if (err) { +- mutex_unlock(&ubi->vtbl_mutex); ++ if (err) + return err; +- } + } + + paranoid_vtbl_check(ubi); +- mutex_unlock(&ubi->vtbl_mutex); +- return ubi_wl_flush(ubi); ++ return 0; + } + + /** +- * vol_til_check - check if volume table is not corrupted and contains sensible +- * data. ++ * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table. ++ * @ubi: UBI device description object ++ * @rename_list: list of &struct ubi_rename_entry objects + * ++ * This function re-names multiple volumes specified in @req in the volume ++ * table. Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubi_vtbl_rename_volumes(struct ubi_device *ubi, ++ struct list_head *rename_list) ++{ ++ int i, err; ++ struct ubi_rename_entry *re; ++ struct ubi_volume *layout_vol; ++ ++ list_for_each_entry(re, rename_list, list) { ++ uint32_t crc; ++ struct ubi_volume *vol = re->desc->vol; ++ struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id]; ++ ++ if (re->remove) { ++ memcpy(vtbl_rec, &empty_vtbl_record, ++ sizeof(struct ubi_vtbl_record)); ++ continue; ++ } ++ ++ vtbl_rec->name_len = cpu_to_be16(re->new_name_len); ++ memcpy(vtbl_rec->name, re->new_name, re->new_name_len); ++ memset(vtbl_rec->name + re->new_name_len, 0, ++ UBI_VOL_NAME_MAX + 1 - re->new_name_len); ++ crc = crc32(UBI_CRC32_INIT, vtbl_rec, ++ UBI_VTBL_RECORD_SIZE_CRC); ++ vtbl_rec->crc = cpu_to_be32(crc); ++ } ++ ++ layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; ++ for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { ++ err = ubi_eba_unmap_leb(ubi, layout_vol, i); ++ if (err) ++ return err; ++ ++ err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, ++ ubi->vtbl_size, UBI_LONGTERM); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * vtbl_check - check if volume table is not corrupted and sensible. + * @ubi: UBI device description object + * @vtbl: volume table + * +@@ -131,7 +177,7 @@ + const struct ubi_vtbl_record *vtbl) + { + int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; +- int upd_marker; ++ int upd_marker, err; + uint32_t crc; + const char *name; + +@@ -157,7 +203,7 @@ + if (reserved_pebs == 0) { + if (memcmp(&vtbl[i], &empty_vtbl_record, + UBI_VTBL_RECORD_SIZE)) { +- dbg_err("bad empty record"); ++ err = 2; + goto bad; + } + continue; +@@ -165,56 +211,57 @@ + + if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || + name_len < 0) { +- dbg_err("negative values"); ++ err = 3; + goto bad; + } + + if (alignment > ubi->leb_size || alignment == 0) { +- dbg_err("bad alignment"); ++ err = 4; + goto bad; + } + +- n = alignment % ubi->min_io_size; ++ n = alignment & (ubi->min_io_size - 1); + if (alignment != 1 && n) { +- dbg_err("alignment is not multiple of min I/O unit"); ++ err = 5; + goto bad; + } + + n = ubi->leb_size % alignment; + if (data_pad != n) { + dbg_err("bad data_pad, has to be %d", n); ++ err = 6; + goto bad; + } + + if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { +- dbg_err("bad vol_type"); ++ err = 7; + goto bad; + } + + if (upd_marker != 0 && upd_marker != 1) { +- dbg_err("bad upd_marker"); ++ err = 8; + goto bad; + } + + if (reserved_pebs > ubi->good_peb_count) { +- dbg_err("too large reserved_pebs, good PEBs %d", +- ubi->good_peb_count); ++ dbg_err("too large reserved_pebs %d, good PEBs %d", ++ reserved_pebs, ubi->good_peb_count); ++ err = 9; + goto bad; + } + + if (name_len > UBI_VOL_NAME_MAX) { +- dbg_err("too long volume name, max %d", +- UBI_VOL_NAME_MAX); ++ err = 10; + goto bad; + } + + if (name[0] == '\0') { +- dbg_err("NULL volume name"); ++ err = 11; + goto bad; + } + + if (name_len != strnlen(name, name_len + 1)) { +- dbg_err("bad name_len"); ++ err = 12; + goto bad; + } + } +@@ -239,7 +286,7 @@ + return 0; + + bad: +- ubi_err("volume table check failed, record %d", i); ++ ubi_err("volume table check failed: record %d, error %d", i, err); + ubi_dbg_dump_vtbl_record(&vtbl[i], i); + return -EINVAL; + } +@@ -273,7 +320,7 @@ + * this volume table copy was found during scanning. It has to be wiped + * out. + */ +- sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID); ++ sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); + if (sv) + old_seb = ubi_scan_find_seb(sv, copy); + +@@ -285,13 +332,12 @@ + } + + vid_hdr->vol_type = UBI_VID_DYNAMIC; +- vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOL_ID); ++ vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); + vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; + vid_hdr->data_size = vid_hdr->used_ebs = + vid_hdr->data_pad = cpu_to_be32(0); + vid_hdr->lnum = cpu_to_be32(copy); + vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); +- vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0); + + /* The EC header is already there, write the VID header */ + err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); +@@ -374,7 +420,7 @@ + * to LEB 0. + */ + +- dbg_msg("check layout volume"); ++ dbg_gen("check layout volume"); + + /* Read both LEB 0 and LEB 1 into memory */ + ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { +@@ -388,7 +434,16 @@ + err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, + ubi->vtbl_size); + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) +- /* Scrub the PEB later */ ++ /* ++ * Scrub the PEB later. Note, -EBADMSG indicates an ++ * uncorrectable ECC error, but we have our own CRC and ++ * the data will be checked later. If the data is OK, ++ * the PEB will be scrubbed (because we set ++ * seb->scrub). If the data is not OK, the contents of ++ * the PEB will be recovered from the second copy, and ++ * seb->scrub will be cleared in ++ * 'ubi_scan_add_used()'. ++ */ + seb->scrub = 1; + else if (err) + goto out_free; +@@ -404,7 +459,8 @@ + if (!leb_corrupted[0]) { + /* LEB 0 is OK */ + if (leb[1]) +- leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); ++ leb_corrupted[1] = memcmp(leb[0], leb[1], ++ ubi->vtbl_size); + if (leb_corrupted[1]) { + ubi_warn("volume table copy #2 is corrupted"); + err = create_vtbl(ubi, si, 1, leb[0]); +@@ -518,6 +574,17 @@ + vol->name[vol->name_len] = '\0'; + vol->vol_id = i; + ++ if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { ++ /* Auto re-size flag may be set only for one volume */ ++ if (ubi->autoresize_vol_id != -1) { ++ ubi_err("more then one auto-resize volume (%d " ++ "and %d)", ubi->autoresize_vol_id, i); ++ return -EINVAL; ++ } ++ ++ ubi->autoresize_vol_id = i; ++ } ++ + ubi_assert(!ubi->volumes[i]); + ubi->volumes[i] = vol; + ubi->vol_count += 1; +@@ -568,6 +635,7 @@ + vol->last_eb_bytes = sv->last_data_size; + } + ++ /* And add the layout volume */ + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; +@@ -582,7 +650,8 @@ + vol->last_eb_bytes = vol->reserved_pebs; + vol->used_bytes = + (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad); +- vol->vol_id = UBI_LAYOUT_VOL_ID; ++ vol->vol_id = UBI_LAYOUT_VOLUME_ID; ++ vol->ref_count = 1; + + ubi_assert(!ubi->volumes[i]); + ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol; +@@ -610,30 +679,32 @@ + static int check_sv(const struct ubi_volume *vol, + const struct ubi_scan_volume *sv) + { ++ int err; ++ + if (sv->highest_lnum >= vol->reserved_pebs) { +- dbg_err("bad highest_lnum"); ++ err = 1; + goto bad; + } + if (sv->leb_count > vol->reserved_pebs) { +- dbg_err("bad leb_count"); ++ err = 2; + goto bad; + } + if (sv->vol_type != vol->vol_type) { +- dbg_err("bad vol_type"); ++ err = 3; + goto bad; + } + if (sv->used_ebs > vol->reserved_pebs) { +- dbg_err("bad used_ebs"); ++ err = 4; + goto bad; + } + if (sv->data_pad != vol->data_pad) { +- dbg_err("bad data_pad"); ++ err = 5; + goto bad; + } + return 0; + + bad: +- ubi_err("bad scanning information"); ++ ubi_err("bad scanning information, error %d", err); + ubi_dbg_dump_sv(sv); + ubi_dbg_dump_vol_info(vol); + return -EINVAL; +@@ -662,14 +733,13 @@ + return -EINVAL; + } + +- if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&& ++ if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && + si->highest_vol_id < UBI_INTERNAL_VOL_START) { + ubi_err("too large volume ID %d found by scanning", + si->highest_vol_id); + return -EINVAL; + } + +- + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + cond_resched(); + +@@ -707,8 +777,7 @@ + } + + /** +- * ubi_read_volume_table - read volume table. +- * information. ++ * ubi_read_volume_table - read the volume table. + * @ubi: UBI device description object + * @si: scanning information + * +@@ -734,7 +803,7 @@ + ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; + ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); + +- sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID); ++ sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); + if (!sv) { + /* + * No logical eraseblocks belonging to the layout volume were +@@ -787,11 +856,10 @@ + + out_free: + vfree(ubi->vtbl); +- for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) +- if (ubi->volumes[i]) { +- kfree(ubi->volumes[i]); +- ubi->volumes[i] = NULL; +- } ++ for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { ++ kfree(ubi->volumes[i]); ++ ubi->volumes[i] = NULL; ++ } + return err; + } + +diff -Nurd linux-2.6.24/drivers/mtd/ubi/wl.c ubifs-v2.6.24/drivers/mtd/ubi/wl.c +--- linux-2.6.24/drivers/mtd/ubi/wl.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/drivers/mtd/ubi/wl.c 2009-04-07 17:14:47.000000000 +0200 +@@ -19,22 +19,22 @@ + */ + + /* +- * UBI wear-leveling unit. ++ * UBI wear-leveling sub-system. + * +- * This unit is responsible for wear-leveling. It works in terms of physical +- * eraseblocks and erase counters and knows nothing about logical eraseblocks, +- * volumes, etc. From this unit's perspective all physical eraseblocks are of +- * two types - used and free. Used physical eraseblocks are those that were +- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are +- * those that were put by the 'ubi_wl_put_peb()' function. ++ * This sub-system is responsible for wear-leveling. It works in terms of ++ * physical eraseblocks and erase counters and knows nothing about logical ++ * eraseblocks, volumes, etc. From this sub-system's perspective all physical ++ * eraseblocks are of two types - used and free. Used physical eraseblocks are ++ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical ++ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function. + * + * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter +- * header. The rest of the physical eraseblock contains only 0xFF bytes. ++ * header. The rest of the physical eraseblock contains only %0xFF bytes. + * +- * When physical eraseblocks are returned to the WL unit by means of the ++ * When physical eraseblocks are returned to the WL sub-system by means of the + * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is + * done asynchronously in context of the per-UBI device background thread, +- * which is also managed by the WL unit. ++ * which is also managed by the WL sub-system. + * + * The wear-leveling is ensured by means of moving the contents of used + * physical eraseblocks with low erase counter to free physical eraseblocks +@@ -43,34 +43,64 @@ + * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick + * an "optimal" physical eraseblock. For example, when it is known that the + * physical eraseblock will be "put" soon because it contains short-term data, +- * the WL unit may pick a free physical eraseblock with low erase counter, and +- * so forth. ++ * the WL sub-system may pick a free physical eraseblock with low erase ++ * counter, and so forth. + * +- * If the WL unit fails to erase a physical eraseblock, it marks it as bad. ++ * If the WL sub-system fails to erase a physical eraseblock, it marks it as ++ * bad. + * +- * This unit is also responsible for scrubbing. If a bit-flip is detected in a +- * physical eraseblock, it has to be moved. Technically this is the same as +- * moving it for wear-leveling reasons. ++ * This sub-system is also responsible for scrubbing. If a bit-flip is detected ++ * in a physical eraseblock, it has to be moved. Technically this is the same ++ * as moving it for wear-leveling reasons. + * +- * As it was said, for the UBI unit all physical eraseblocks are either "free" +- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used +- * eraseblocks are kept in a set of different RB-trees: @wl->used, +- * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. ++ * As it was said, for the UBI sub-system all physical eraseblocks are either ++ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while ++ * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or ++ * (temporarily) in the @wl->pq queue. ++ * ++ * When the WL sub-system returns a physical eraseblock, the physical ++ * eraseblock is protected from being moved for some "time". For this reason, ++ * the physical eraseblock is not directly moved from the @wl->free tree to the ++ * @wl->used tree. There is a protection queue in between where this ++ * physical eraseblock is temporarily stored (@wl->pq). ++ * ++ * All this protection stuff is needed because: ++ * o we don't want to move physical eraseblocks just after we have given them ++ * to the user; instead, we first want to let users fill them up with data; ++ * ++ * o there is a chance that the user will put the physical eraseblock very ++ * soon, so it makes sense not to move it for some time, but wait; this is ++ * especially important in case of "short term" physical eraseblocks. ++ * ++ * Physical eraseblocks stay protected only for limited time. But the "time" is ++ * measured in erase cycles in this case. This is implemented with help of the ++ * protection queue. Eraseblocks are put to the tail of this queue when they ++ * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the ++ * head of the queue on each erase operation (for any eraseblock). So the ++ * length of the queue defines how may (global) erase cycles PEBs are protected. ++ * ++ * To put it differently, each physical eraseblock has 2 main states: free and ++ * used. The former state corresponds to the @wl->free tree. The latter state ++ * is split up on several sub-states: ++ * o the WL movement is allowed (@wl->used tree); ++ * o the WL movement is temporarily prohibited (@wl->pq queue); ++ * o scrubbing is needed (@wl->scrub tree). ++ * ++ * Depending on the sub-state, wear-leveling entries of the used physical ++ * eraseblocks may be kept in one of those structures. + * + * Note, in this implementation, we keep a small in-RAM object for each physical + * eraseblock. This is surely not a scalable solution. But it appears to be good + * enough for moderately large flashes and it is simple. In future, one may +- * re-work this unit and make it more scalable. ++ * re-work this sub-system and make it more scalable. + * +- * At the moment this unit does not utilize the sequence number, which was +- * introduced relatively recently. But it would be wise to do this because the +- * sequence number of a logical eraseblock characterizes how old is it. For ++ * At the moment this sub-system does not utilize the sequence number, which ++ * was introduced relatively recently. But it would be wise to do this because ++ * the sequence number of a logical eraseblock characterizes how old is it. For + * example, when we move a PEB with low erase counter, and we need to pick the + * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we + * pick target PEB with an average EC if our PEB is not very "old". This is a +- * room for future re-works of the WL unit. +- * +- * FIXME: looks too complex, should be simplified (later). ++ * room for future re-works of the WL sub-system. + */ + + #include <linux/slab.h> +@@ -83,29 +113,22 @@ + #define WL_RESERVED_PEBS 1 + + /* +- * How many erase cycles are short term, unknown, and long term physical +- * eraseblocks protected. +- */ +-#define ST_PROTECTION 16 +-#define U_PROTECTION 10 +-#define LT_PROTECTION 4 +- +-/* + * Maximum difference between two erase counters. If this threshold is +- * exceeded, the WL unit starts moving data from used physical eraseblocks with +- * low erase counter to free physical eraseblocks with high erase counter. ++ * exceeded, the WL sub-system starts moving data from used physical ++ * eraseblocks with low erase counter to free physical eraseblocks with high ++ * erase counter. + */ + #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD + + /* +- * When a physical eraseblock is moved, the WL unit has to pick the target ++ * When a physical eraseblock is moved, the WL sub-system has to pick the target + * physical eraseblock to move to. The simplest way would be just to pick the + * one with the highest erase counter. But in certain workloads this could lead + * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a + * situation when the picked physical eraseblock is constantly erased after the + * data is written to it. So, we have a constant which limits the highest erase +- * counter of the free physical eraseblock to pick. Namely, the WL unit does +- * not pick eraseblocks with erase counter greater then the lowest erase ++ * counter of the free physical eraseblock to pick. Namely, the WL sub-system ++ * does not pick eraseblocks with erase counter greater then the lowest erase + * counter plus %WL_FREE_MAX_DIFF. + */ + #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) +@@ -117,80 +140,9 @@ + #define WL_MAX_FAILURES 32 + + /** +- * struct ubi_wl_entry - wear-leveling entry. +- * @rb: link in the corresponding RB-tree +- * @ec: erase counter +- * @pnum: physical eraseblock number +- * +- * Each physical eraseblock has a corresponding &struct wl_entry object which +- * may be kept in different RB-trees. +- */ +-struct ubi_wl_entry { +- struct rb_node rb; +- int ec; +- int pnum; +-}; +- +-/** +- * struct ubi_wl_prot_entry - PEB protection entry. +- * @rb_pnum: link in the @wl->prot.pnum RB-tree +- * @rb_aec: link in the @wl->prot.aec RB-tree +- * @abs_ec: the absolute erase counter value when the protection ends +- * @e: the wear-leveling entry of the physical eraseblock under protection +- * +- * When the WL unit returns a physical eraseblock, the physical eraseblock is +- * protected from being moved for some "time". For this reason, the physical +- * eraseblock is not directly moved from the @wl->free tree to the @wl->used +- * tree. There is one more tree in between where this physical eraseblock is +- * temporarily stored (@wl->prot). +- * +- * All this protection stuff is needed because: +- * o we don't want to move physical eraseblocks just after we have given them +- * to the user; instead, we first want to let users fill them up with data; +- * +- * o there is a chance that the user will put the physical eraseblock very +- * soon, so it makes sense not to move it for some time, but wait; this is +- * especially important in case of "short term" physical eraseblocks. +- * +- * Physical eraseblocks stay protected only for limited time. But the "time" is +- * measured in erase cycles in this case. This is implemented with help of the +- * absolute erase counter (@wl->abs_ec). When it reaches certain value, the +- * physical eraseblocks are moved from the protection trees (@wl->prot.*) to +- * the @wl->used tree. +- * +- * Protected physical eraseblocks are searched by physical eraseblock number +- * (when they are put) and by the absolute erase counter (to check if it is +- * time to move them to the @wl->used tree). So there are actually 2 RB-trees +- * storing the protected physical eraseblocks: @wl->prot.pnum and +- * @wl->prot.aec. They are referred to as the "protection" trees. The +- * first one is indexed by the physical eraseblock number. The second one is +- * indexed by the absolute erase counter. Both trees store +- * &struct ubi_wl_prot_entry objects. +- * +- * Each physical eraseblock has 2 main states: free and used. The former state +- * corresponds to the @wl->free tree. The latter state is split up on several +- * sub-states: +- * o the WL movement is allowed (@wl->used tree); +- * o the WL movement is temporarily prohibited (@wl->prot.pnum and +- * @wl->prot.aec trees); +- * o scrubbing is needed (@wl->scrub tree). +- * +- * Depending on the sub-state, wear-leveling entries of the used physical +- * eraseblocks may be kept in one of those trees. +- */ +-struct ubi_wl_prot_entry { +- struct rb_node rb_pnum; +- struct rb_node rb_aec; +- unsigned long long abs_ec; +- struct ubi_wl_entry *e; +-}; +- +-/** + * struct ubi_work - UBI work description data structure. + * @list: a link in the list of pending works + * @func: worker function +- * @priv: private data of the worker function +- * + * @e: physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * +@@ -211,14 +163,13 @@ + static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); + static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root); ++static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); + #else + #define paranoid_check_ec(ubi, pnum, ec) 0 + #define paranoid_check_in_wl_tree(e, root) ++#define paranoid_check_in_pq(ubi, e) 0 + #endif + +-/* Slab cache for wear-leveling entries */ +-static struct kmem_cache *wl_entries_slab; +- + /** + * wl_tree_add - add a wear-leveling entry to a WL RB-tree. + * @e: the wear-leveling entry to add +@@ -236,7 +187,7 @@ + struct ubi_wl_entry *e1; + + parent = *p; +- e1 = rb_entry(parent, struct ubi_wl_entry, rb); ++ e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); + + if (e->ec < e1->ec) + p = &(*p)->rb_left; +@@ -251,8 +202,8 @@ + } + } + +- rb_link_node(&e->rb, parent, p); +- rb_insert_color(&e->rb, root); ++ rb_link_node(&e->u.rb, parent, p); ++ rb_insert_color(&e->u.rb, root); + } + + /** +@@ -267,15 +218,26 @@ + int err; + struct ubi_work *wrk; + +- spin_lock(&ubi->wl_lock); ++ cond_resched(); + ++ /* ++ * @ubi->work_sem is used to synchronize with the workers. Workers take ++ * it in read mode, so many of them may be doing works at a time. But ++ * the queue flush code has to be sure the whole queue of works is ++ * done, and it takes the mutex in write mode. ++ */ ++ down_read(&ubi->work_sem); ++ spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works)) { + spin_unlock(&ubi->wl_lock); ++ up_read(&ubi->work_sem); + return 0; + } + + wrk = list_entry(ubi->works.next, struct ubi_work, list); + list_del(&wrk->list); ++ ubi->works_count -= 1; ++ ubi_assert(ubi->works_count >= 0); + spin_unlock(&ubi->wl_lock); + + /* +@@ -286,11 +248,8 @@ + err = wrk->func(ubi, wrk, 0); + if (err) + ubi_err("work failed with error code %d", err); ++ up_read(&ubi->work_sem); + +- spin_lock(&ubi->wl_lock); +- ubi->works_count -= 1; +- ubi_assert(ubi->works_count >= 0); +- spin_unlock(&ubi->wl_lock); + return err; + } + +@@ -339,7 +298,7 @@ + while (p) { + struct ubi_wl_entry *e1; + +- e1 = rb_entry(p, struct ubi_wl_entry, rb); ++ e1 = rb_entry(p, struct ubi_wl_entry, u.rb); + + if (e->pnum == e1->pnum) { + ubi_assert(e == e1); +@@ -363,50 +322,24 @@ + } + + /** +- * prot_tree_add - add physical eraseblock to protection trees. ++ * prot_queue_add - add physical eraseblock to the protection queue. + * @ubi: UBI device description object + * @e: the physical eraseblock to add +- * @pe: protection entry object to use +- * @abs_ec: absolute erase counter value when this physical eraseblock has +- * to be removed from the protection trees. + * +- * @wl->lock has to be locked. ++ * This function adds @e to the tail of the protection queue @ubi->pq, where ++ * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be ++ * temporarily protected from the wear-leveling worker. Note, @wl->lock has to ++ * be locked. + */ +-static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, +- struct ubi_wl_prot_entry *pe, int abs_ec) ++static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) + { +- struct rb_node **p, *parent = NULL; +- struct ubi_wl_prot_entry *pe1; +- +- pe->e = e; +- pe->abs_ec = ubi->abs_ec + abs_ec; +- +- p = &ubi->prot.pnum.rb_node; +- while (*p) { +- parent = *p; +- pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); +- +- if (e->pnum < pe1->e->pnum) +- p = &(*p)->rb_left; +- else +- p = &(*p)->rb_right; +- } +- rb_link_node(&pe->rb_pnum, parent, p); +- rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); +- +- p = &ubi->prot.aec.rb_node; +- parent = NULL; +- while (*p) { +- parent = *p; +- pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); ++ int pq_tail = ubi->pq_head - 1; + +- if (pe->abs_ec < pe1->abs_ec) +- p = &(*p)->rb_left; +- else +- p = &(*p)->rb_right; +- } +- rb_link_node(&pe->rb_aec, parent, p); +- rb_insert_color(&pe->rb_aec, &ubi->prot.aec); ++ if (pq_tail < 0) ++ pq_tail = UBI_PROT_QUEUE_LEN - 1; ++ ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); ++ list_add_tail(&e->u.list, &ubi->pq[pq_tail]); ++ dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); + } + + /** +@@ -422,14 +355,14 @@ + struct rb_node *p; + struct ubi_wl_entry *e; + +- e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); ++ e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); + max += e->ec; + + p = root->rb_node; + while (p) { + struct ubi_wl_entry *e1; + +- e1 = rb_entry(p, struct ubi_wl_entry, rb); ++ e1 = rb_entry(p, struct ubi_wl_entry, u.rb); + if (e1->ec >= max) + p = p->rb_left; + else { +@@ -451,17 +384,12 @@ + */ + int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) + { +- int err, protect, medium_ec; ++ int err, medium_ec; + struct ubi_wl_entry *e, *first, *last; +- struct ubi_wl_prot_entry *pe; + + ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || + dtype == UBI_UNKNOWN); + +- pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); +- if (!pe) +- return -ENOMEM; +- + retry: + spin_lock(&ubi->wl_lock); + if (!ubi->free.rb_node) { +@@ -469,110 +397,91 @@ + ubi_assert(list_empty(&ubi->works)); + ubi_err("no free eraseblocks"); + spin_unlock(&ubi->wl_lock); +- kfree(pe); + return -ENOSPC; + } + spin_unlock(&ubi->wl_lock); + + err = produce_free_peb(ubi); +- if (err < 0) { +- kfree(pe); ++ if (err < 0) + return err; +- } + goto retry; + } + + switch (dtype) { +- case UBI_LONGTERM: +- /* +- * For long term data we pick a physical eraseblock +- * with high erase counter. But the highest erase +- * counter we can pick is bounded by the the lowest +- * erase counter plus %WL_FREE_MAX_DIFF. +- */ +- e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); +- protect = LT_PROTECTION; +- break; +- case UBI_UNKNOWN: +- /* +- * For unknown data we pick a physical eraseblock with +- * medium erase counter. But we by no means can pick a +- * physical eraseblock with erase counter greater or +- * equivalent than the lowest erase counter plus +- * %WL_FREE_MAX_DIFF. +- */ +- first = rb_entry(rb_first(&ubi->free), +- struct ubi_wl_entry, rb); +- last = rb_entry(rb_last(&ubi->free), +- struct ubi_wl_entry, rb); ++ case UBI_LONGTERM: ++ /* ++ * For long term data we pick a physical eraseblock with high ++ * erase counter. But the highest erase counter we can pick is ++ * bounded by the the lowest erase counter plus ++ * %WL_FREE_MAX_DIFF. ++ */ ++ e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); ++ break; ++ case UBI_UNKNOWN: ++ /* ++ * For unknown data we pick a physical eraseblock with medium ++ * erase counter. But we by no means can pick a physical ++ * eraseblock with erase counter greater or equivalent than the ++ * lowest erase counter plus %WL_FREE_MAX_DIFF. ++ */ ++ first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, ++ u.rb); ++ last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb); + +- if (last->ec - first->ec < WL_FREE_MAX_DIFF) +- e = rb_entry(ubi->free.rb_node, +- struct ubi_wl_entry, rb); +- else { +- medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; +- e = find_wl_entry(&ubi->free, medium_ec); +- } +- protect = U_PROTECTION; +- break; +- case UBI_SHORTTERM: +- /* +- * For short term data we pick a physical eraseblock +- * with the lowest erase counter as we expect it will +- * be erased soon. +- */ +- e = rb_entry(rb_first(&ubi->free), +- struct ubi_wl_entry, rb); +- protect = ST_PROTECTION; +- break; +- default: +- protect = 0; +- e = NULL; +- BUG(); ++ if (last->ec - first->ec < WL_FREE_MAX_DIFF) ++ e = rb_entry(ubi->free.rb_node, ++ struct ubi_wl_entry, u.rb); ++ else { ++ medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; ++ e = find_wl_entry(&ubi->free, medium_ec); ++ } ++ break; ++ case UBI_SHORTTERM: ++ /* ++ * For short term data we pick a physical eraseblock with the ++ * lowest erase counter as we expect it will be erased soon. ++ */ ++ e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb); ++ break; ++ default: ++ BUG(); + } + ++ paranoid_check_in_wl_tree(e, &ubi->free); ++ + /* +- * Move the physical eraseblock to the protection trees where it will ++ * Move the physical eraseblock to the protection queue where it will + * be protected from being moved for some time. + */ +- paranoid_check_in_wl_tree(e, &ubi->free); +- rb_erase(&e->rb, &ubi->free); +- prot_tree_add(ubi, e, pe, protect); +- +- dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); ++ rb_erase(&e->u.rb, &ubi->free); ++ dbg_wl("PEB %d EC %d", e->pnum, e->ec); ++ prot_queue_add(ubi, e); + spin_unlock(&ubi->wl_lock); +- + return e->pnum; + } + + /** +- * prot_tree_del - remove a physical eraseblock from the protection trees ++ * prot_queue_del - remove a physical eraseblock from the protection queue. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to remove ++ * ++ * This function deletes PEB @pnum from the protection queue and returns zero ++ * in case of success and %-ENODEV if the PEB was not found. + */ +-static void prot_tree_del(struct ubi_device *ubi, int pnum) ++static int prot_queue_del(struct ubi_device *ubi, int pnum) + { +- struct rb_node *p; +- struct ubi_wl_prot_entry *pe = NULL; +- +- p = ubi->prot.pnum.rb_node; +- while (p) { +- +- pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); ++ struct ubi_wl_entry *e; + +- if (pnum == pe->e->pnum) +- break; ++ e = ubi->lookuptbl[pnum]; ++ if (!e) ++ return -ENODEV; + +- if (pnum < pe->e->pnum) +- p = p->rb_left; +- else +- p = p->rb_right; +- } ++ if (paranoid_check_in_pq(ubi, e)) ++ return -ENODEV; + +- ubi_assert(pe->e->pnum == pnum); +- rb_erase(&pe->rb_aec, &ubi->prot.aec); +- rb_erase(&pe->rb_pnum, &ubi->prot.pnum); +- kfree(pe); ++ list_del(&e->u.list); ++ dbg_wl("deleted PEB %d from the protection queue", e->pnum); ++ return 0; + } + + /** +@@ -584,7 +493,8 @@ + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +-static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) ++static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, ++ int torture) + { + int err; + struct ubi_ec_hdr *ec_hdr; +@@ -636,48 +546,47 @@ + } + + /** +- * check_protection_over - check if it is time to stop protecting some +- * physical eraseblocks. ++ * serve_prot_queue - check if it is time to stop protecting PEBs. + * @ubi: UBI device description object + * +- * This function is called after each erase operation, when the absolute erase +- * counter is incremented, to check if some physical eraseblock have not to be +- * protected any longer. These physical eraseblocks are moved from the +- * protection trees to the used tree. ++ * This function is called after each erase operation and removes PEBs from the ++ * tail of the protection queue. These PEBs have been protected for long enough ++ * and should be moved to the used tree. + */ +-static void check_protection_over(struct ubi_device *ubi) ++static void serve_prot_queue(struct ubi_device *ubi) + { +- struct ubi_wl_prot_entry *pe; ++ struct ubi_wl_entry *e, *tmp; ++ int count; + + /* + * There may be several protected physical eraseblock to remove, + * process them all. + */ +- while (1) { +- spin_lock(&ubi->wl_lock); +- if (!ubi->prot.aec.rb_node) { +- spin_unlock(&ubi->wl_lock); +- break; +- } +- +- pe = rb_entry(rb_first(&ubi->prot.aec), +- struct ubi_wl_prot_entry, rb_aec); ++repeat: ++ count = 0; ++ spin_lock(&ubi->wl_lock); ++ list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { ++ dbg_wl("PEB %d EC %d protection over, move to used tree", ++ e->pnum, e->ec); + +- if (pe->abs_ec > ubi->abs_ec) { ++ list_del(&e->u.list); ++ wl_tree_add(e, &ubi->used); ++ if (count++ > 32) { ++ /* ++ * Let's be nice and avoid holding the spinlock for ++ * too long. ++ */ + spin_unlock(&ubi->wl_lock); +- break; ++ cond_resched(); ++ goto repeat; + } +- +- dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", +- pe->e->pnum, ubi->abs_ec, pe->abs_ec); +- rb_erase(&pe->rb_aec, &ubi->prot.aec); +- rb_erase(&pe->rb_pnum, &ubi->prot.pnum); +- wl_tree_add(pe->e, &ubi->used); +- spin_unlock(&ubi->wl_lock); +- +- kfree(pe); +- cond_resched(); + } ++ ++ ubi->pq_head += 1; ++ if (ubi->pq_head == UBI_PROT_QUEUE_LEN) ++ ubi->pq_head = 0; ++ ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); ++ spin_unlock(&ubi->wl_lock); + } + + /** +@@ -685,8 +594,8 @@ + * @ubi: UBI device description object + * @wrk: the work to schedule + * +- * This function enqueues a work defined by @wrk to the tail of the pending +- * works list. ++ * This function adds a work defined by @wrk to the tail of the pending works ++ * list. + */ + static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) + { +@@ -744,12 +653,11 @@ + static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + int cancel) + { +- int err, put = 0; ++ int err, scrubbing = 0, torture = 0; + struct ubi_wl_entry *e1, *e2; + struct ubi_vid_hdr *vid_hdr; + + kfree(wrk); +- + if (cancel) + return 0; + +@@ -757,21 +665,17 @@ + if (!vid_hdr) + return -ENOMEM; + ++ mutex_lock(&ubi->move_mutex); + spin_lock(&ubi->wl_lock); ++ ubi_assert(!ubi->move_from && !ubi->move_to); ++ ubi_assert(!ubi->move_to_put); + +- /* +- * Only one WL worker at a time is supported at this implementation, so +- * make sure a PEB is not being moved already. +- */ +- if (ubi->move_to || !ubi->free.rb_node || ++ if (!ubi->free.rb_node || + (!ubi->used.rb_node && !ubi->scrub.rb_node)) { + /* +- * Only one WL worker at a time is supported at this +- * implementation, so if a LEB is already being moved, cancel. +- * +- * No free physical eraseblocks? Well, we cancel wear-leveling +- * then. It will be triggered again when a free physical +- * eraseblock appears. ++ * No free physical eraseblocks? Well, they must be waiting in ++ * the queue to be erased. Cancel movement - it will be ++ * triggered again when a free physical eraseblock appears. + * + * No used physical eraseblocks? They must be temporarily + * protected from being moved. They will be moved to the +@@ -780,10 +684,7 @@ + */ + dbg_wl("cancel WL, a list is empty: free %d, used %d", + !ubi->free.rb_node, !ubi->used.rb_node); +- ubi->wl_scheduled = 0; +- spin_unlock(&ubi->wl_lock); +- ubi_free_vid_hdr(ubi, vid_hdr); +- return 0; ++ goto out_cancel; + } + + if (!ubi->scrub.rb_node) { +@@ -792,33 +693,30 @@ + * highly worn-out free physical eraseblock. If the erase + * counters differ much enough, start wear-leveling. + */ +- e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); ++ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); + e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + + if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { + dbg_wl("no WL needed: min used EC %d, max free EC %d", + e1->ec, e2->ec); +- ubi->wl_scheduled = 0; +- spin_unlock(&ubi->wl_lock); +- ubi_free_vid_hdr(ubi, vid_hdr); +- return 0; ++ goto out_cancel; + } + paranoid_check_in_wl_tree(e1, &ubi->used); +- rb_erase(&e1->rb, &ubi->used); ++ rb_erase(&e1->u.rb, &ubi->used); + dbg_wl("move PEB %d EC %d to PEB %d EC %d", + e1->pnum, e1->ec, e2->pnum, e2->ec); + } else { +- e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); ++ /* Perform scrubbing */ ++ scrubbing = 1; ++ e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); + e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + paranoid_check_in_wl_tree(e1, &ubi->scrub); +- rb_erase(&e1->rb, &ubi->scrub); ++ rb_erase(&e1->u.rb, &ubi->scrub); + dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); + } + + paranoid_check_in_wl_tree(e2, &ubi->free); +- rb_erase(&e2->rb, &ubi->free); +- ubi_assert(!ubi->move_from && !ubi->move_to); +- ubi_assert(!ubi->move_to_put && !ubi->move_from_put); ++ rb_erase(&e2->u.rb, &ubi->free); + ubi->move_from = e1; + ubi->move_to = e2; + spin_unlock(&ubi->wl_lock); +@@ -828,6 +726,10 @@ + * We so far do not know which logical eraseblock our physical + * eraseblock (@e1) belongs to. We have to read the volume identifier + * header first. ++ * ++ * Note, we are protected from this PEB being unmapped and erased. The ++ * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB ++ * which is being moved was unmapped. + */ + + err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); +@@ -842,97 +744,145 @@ + * likely have the VID header in place. + */ + dbg_wl("PEB %d has no VID header", e1->pnum); +- err = 0; +- } else { +- ubi_err("error %d while reading VID header from PEB %d", +- err, e1->pnum); +- if (err > 0) +- err = -EIO; ++ goto out_not_moved; + } +- goto error; ++ ++ ubi_err("error %d while reading VID header from PEB %d", ++ err, e1->pnum); ++ if (err > 0) ++ err = -EIO; ++ goto out_error; + } + + err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); + if (err) { +- if (err == UBI_IO_BITFLIPS) +- err = 0; +- goto error; ++ if (err == -EAGAIN) ++ goto out_not_moved; ++ if (err < 0) ++ goto out_error; ++ if (err == 2) { ++ /* Target PEB write error, torture it */ ++ torture = 1; ++ goto out_not_moved; ++ } ++ ++ /* ++ * The LEB has not been moved because the volume is being ++ * deleted or the PEB has been put meanwhile. We should prevent ++ * this PEB from being selected for wear-leveling movement ++ * again, so put it to the protection queue. ++ */ ++ ++ dbg_wl("canceled moving PEB %d", e1->pnum); ++ ubi_assert(err == 1); ++ ++ ubi_free_vid_hdr(ubi, vid_hdr); ++ vid_hdr = NULL; ++ ++ spin_lock(&ubi->wl_lock); ++ prot_queue_add(ubi, e1); ++ ubi_assert(!ubi->move_to_put); ++ ubi->move_from = ubi->move_to = NULL; ++ ubi->wl_scheduled = 0; ++ spin_unlock(&ubi->wl_lock); ++ ++ e1 = NULL; ++ err = schedule_erase(ubi, e2, 0); ++ if (err) ++ goto out_error; ++ mutex_unlock(&ubi->move_mutex); ++ return 0; + } + ++ /* The PEB has been successfully moved */ + ubi_free_vid_hdr(ubi, vid_hdr); ++ vid_hdr = NULL; ++ if (scrubbing) ++ ubi_msg("scrubbed PEB %d, data moved to PEB %d", ++ e1->pnum, e2->pnum); ++ + spin_lock(&ubi->wl_lock); +- if (!ubi->move_to_put) ++ if (!ubi->move_to_put) { + wl_tree_add(e2, &ubi->used); +- else +- put = 1; ++ e2 = NULL; ++ } + ubi->move_from = ubi->move_to = NULL; +- ubi->move_from_put = ubi->move_to_put = 0; +- ubi->wl_scheduled = 0; ++ ubi->move_to_put = ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + +- if (put) { ++ err = schedule_erase(ubi, e1, 0); ++ if (err) { ++ e1 = NULL; ++ goto out_error; ++ } ++ ++ if (e2) { + /* + * Well, the target PEB was put meanwhile, schedule it for + * erasure. + */ + dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); + err = schedule_erase(ubi, e2, 0); +- if (err) { +- kmem_cache_free(wl_entries_slab, e2); +- ubi_ro_mode(ubi); +- } +- } +- +- err = schedule_erase(ubi, e1, 0); +- if (err) { +- kmem_cache_free(wl_entries_slab, e1); +- ubi_ro_mode(ubi); ++ if (err) ++ goto out_error; + } + + dbg_wl("done"); +- return err; ++ mutex_unlock(&ubi->move_mutex); ++ return 0; + + /* +- * Some error occurred. @e1 was not changed, so return it back. @e2 +- * might be changed, schedule it for erasure. ++ * For some reasons the LEB was not moved, might be an error, might be ++ * something else. @e1 was not changed, so return it back. @e2 might ++ * have been changed, schedule it for erasure. + */ +-error: +- if (err) +- dbg_wl("error %d occurred, cancel operation", err); +- ubi_assert(err <= 0); +- ++out_not_moved: ++ dbg_wl("canceled moving PEB %d", e1->pnum); + ubi_free_vid_hdr(ubi, vid_hdr); ++ vid_hdr = NULL; + spin_lock(&ubi->wl_lock); +- ubi->wl_scheduled = 0; +- if (ubi->move_from_put) +- put = 1; ++ if (scrubbing) ++ wl_tree_add(e1, &ubi->scrub); + else + wl_tree_add(e1, &ubi->used); ++ ubi_assert(!ubi->move_to_put); + ubi->move_from = ubi->move_to = NULL; +- ubi->move_from_put = ubi->move_to_put = 0; ++ ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + +- if (put) { +- /* +- * Well, the target PEB was put meanwhile, schedule it for +- * erasure. +- */ +- dbg_wl("PEB %d was put meanwhile, erase", e1->pnum); +- err = schedule_erase(ubi, e1, 0); +- if (err) { +- kmem_cache_free(wl_entries_slab, e1); +- ubi_ro_mode(ubi); +- } +- } ++ e1 = NULL; ++ err = schedule_erase(ubi, e2, torture); ++ if (err) ++ goto out_error; + +- err = schedule_erase(ubi, e2, 0); +- if (err) { +- kmem_cache_free(wl_entries_slab, e2); +- ubi_ro_mode(ubi); +- } ++ mutex_unlock(&ubi->move_mutex); ++ return 0; + +- yield(); ++out_error: ++ ubi_err("error %d while moving PEB %d to PEB %d", ++ err, e1->pnum, e2->pnum); ++ ++ ubi_free_vid_hdr(ubi, vid_hdr); ++ spin_lock(&ubi->wl_lock); ++ ubi->move_from = ubi->move_to = NULL; ++ ubi->move_to_put = ubi->wl_scheduled = 0; ++ spin_unlock(&ubi->wl_lock); ++ ++ if (e1) ++ kmem_cache_free(ubi_wl_entry_slab, e1); ++ if (e2) ++ kmem_cache_free(ubi_wl_entry_slab, e2); ++ ubi_ro_mode(ubi); ++ ++ mutex_unlock(&ubi->move_mutex); + return err; ++ ++out_cancel: ++ ubi->wl_scheduled = 0; ++ spin_unlock(&ubi->wl_lock); ++ mutex_unlock(&ubi->move_mutex); ++ ubi_free_vid_hdr(ubi, vid_hdr); ++ return 0; + } + + /** +@@ -970,7 +920,7 @@ + * erase counter of free physical eraseblocks is greater then + * %UBI_WL_THRESHOLD. + */ +- e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); ++ e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); + e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + + if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) +@@ -1020,7 +970,7 @@ + if (cancel) { + dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); + kfree(wl_wrk); +- kmem_cache_free(wl_entries_slab, e); ++ kmem_cache_free(ubi_wl_entry_slab, e); + return 0; + } + +@@ -1032,15 +982,14 @@ + kfree(wl_wrk); + + spin_lock(&ubi->wl_lock); +- ubi->abs_ec += 1; + wl_tree_add(e, &ubi->free); + spin_unlock(&ubi->wl_lock); + + /* +- * One more erase operation has happened, take care about protected +- * physical eraseblocks. ++ * One more erase operation has happened, take care about ++ * protected physical eraseblocks. + */ +- check_protection_over(ubi); ++ serve_prot_queue(ubi); + + /* And take care about wear-leveling */ + err = ensure_wear_leveling(ubi); +@@ -1049,7 +998,7 @@ + + ubi_err("failed to erase PEB %d, error %d", pnum, err); + kfree(wl_wrk); +- kmem_cache_free(wl_entries_slab, e); ++ kmem_cache_free(ubi_wl_entry_slab, e); + + if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || + err == -EBUSY) { +@@ -1119,8 +1068,7 @@ + } + + /** +- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling +- * unit. ++ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system. + * @ubi: UBI device description object + * @pnum: physical eraseblock to return + * @torture: if this physical eraseblock has to be tortured +@@ -1128,7 +1076,7 @@ + * This function is called to return physical eraseblock @pnum to the pool of + * free physical eraseblocks. The @torture flag has to be set if an I/O error + * occurred to this @pnum and it has to be tested. This function returns zero +- * in case of success and a negative error code in case of failure. ++ * in case of success, and a negative error code in case of failure. + */ + int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) + { +@@ -1139,8 +1087,8 @@ + ubi_assert(pnum >= 0); + ubi_assert(pnum < ubi->peb_count); + ++retry: + spin_lock(&ubi->wl_lock); +- + e = ubi->lookuptbl[pnum]; + if (e == ubi->move_from) { + /* +@@ -1148,17 +1096,22 @@ + * be moved. It will be scheduled for erasure in the + * wear-leveling worker. + */ +- dbg_wl("PEB %d is being moved", pnum); +- ubi_assert(!ubi->move_from_put); +- ubi->move_from_put = 1; ++ dbg_wl("PEB %d is being moved, wait", pnum); + spin_unlock(&ubi->wl_lock); +- return 0; ++ ++ /* Wait for the WL worker by taking the @ubi->move_mutex */ ++ mutex_lock(&ubi->move_mutex); ++ mutex_unlock(&ubi->move_mutex); ++ goto retry; + } else if (e == ubi->move_to) { + /* + * User is putting the physical eraseblock which was selected + * as the target the data is moved to. It may happen if the EBA +- * unit already re-mapped the LEB but the WL unit did has not +- * put the PEB to the "used" tree. ++ * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()' ++ * but the WL sub-system has not put the PEB to the "used" tree ++ * yet, but it is about to do this. So we just set a flag which ++ * will tell the WL worker that the PEB is not needed anymore ++ * and should be scheduled for erasure. + */ + dbg_wl("PEB %d is the target of data moving", pnum); + ubi_assert(!ubi->move_to_put); +@@ -1168,12 +1121,19 @@ + } else { + if (in_wl_tree(e, &ubi->used)) { + paranoid_check_in_wl_tree(e, &ubi->used); +- rb_erase(&e->rb, &ubi->used); ++ rb_erase(&e->u.rb, &ubi->used); + } else if (in_wl_tree(e, &ubi->scrub)) { + paranoid_check_in_wl_tree(e, &ubi->scrub); +- rb_erase(&e->rb, &ubi->scrub); +- } else +- prot_tree_del(ubi, e->pnum); ++ rb_erase(&e->u.rb, &ubi->scrub); ++ } else { ++ err = prot_queue_del(ubi, e->pnum); ++ if (err) { ++ ubi_err("PEB %d not found", pnum); ++ ubi_ro_mode(ubi); ++ spin_unlock(&ubi->wl_lock); ++ return err; ++ } ++ } + } + spin_unlock(&ubi->wl_lock); + +@@ -1201,7 +1161,7 @@ + { + struct ubi_wl_entry *e; + +- ubi_msg("schedule PEB %d for scrubbing", pnum); ++ dbg_msg("schedule PEB %d for scrubbing", pnum); + + retry: + spin_lock(&ubi->wl_lock); +@@ -1226,9 +1186,18 @@ + + if (in_wl_tree(e, &ubi->used)) { + paranoid_check_in_wl_tree(e, &ubi->used); +- rb_erase(&e->rb, &ubi->used); +- } else +- prot_tree_del(ubi, pnum); ++ rb_erase(&e->u.rb, &ubi->used); ++ } else { ++ int err; ++ ++ err = prot_queue_del(ubi, e->pnum); ++ if (err) { ++ ubi_err("PEB %d not found", pnum); ++ ubi_ro_mode(ubi); ++ spin_unlock(&ubi->wl_lock); ++ return err; ++ } ++ } + + wl_tree_add(e, &ubi->scrub); + spin_unlock(&ubi->wl_lock); +@@ -1249,17 +1218,32 @@ + */ + int ubi_wl_flush(struct ubi_device *ubi) + { +- int err, pending_count; ++ int err; + +- pending_count = ubi->works_count; ++ /* ++ * Erase while the pending works queue is not empty, but not more than ++ * the number of currently pending works. ++ */ ++ dbg_wl("flush (%d pending works)", ubi->works_count); ++ while (ubi->works_count) { ++ err = do_work(ubi); ++ if (err) ++ return err; ++ } + +- dbg_wl("flush (%d pending works)", pending_count); ++ /* ++ * Make sure all the works which have been done in parallel are ++ * finished. ++ */ ++ down_write(&ubi->work_sem); ++ up_write(&ubi->work_sem); + + /* +- * Erase while the pending works queue is not empty, but not more then +- * the number of currently pending works. ++ * And in case last was the WL worker and it canceled the LEB ++ * movement, flush again. + */ +- while (pending_count-- > 0) { ++ while (ubi->works_count) { ++ dbg_wl("flush more (%d pending works)", ubi->works_count); + err = do_work(ubi); + if (err) + return err; +@@ -1284,17 +1268,17 @@ + else if (rb->rb_right) + rb = rb->rb_right; + else { +- e = rb_entry(rb, struct ubi_wl_entry, rb); ++ e = rb_entry(rb, struct ubi_wl_entry, u.rb); + + rb = rb_parent(rb); + if (rb) { +- if (rb->rb_left == &e->rb) ++ if (rb->rb_left == &e->u.rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + +- kmem_cache_free(wl_entries_slab, e); ++ kmem_cache_free(ubi_wl_entry_slab, e); + } + } + } +@@ -1303,7 +1287,7 @@ + * ubi_thread - UBI background thread. + * @u: the UBI device description object pointer + */ +-static int ubi_thread(void *u) ++int ubi_thread(void *u) + { + int failures = 0; + struct ubi_device *ubi = u; +@@ -1316,7 +1300,7 @@ + int err; + + if (kthread_should_stop()) +- goto out; ++ break; + + if (try_to_freeze()) + continue; +@@ -1343,7 +1327,8 @@ + ubi_msg("%s: %d consecutive failures", + ubi->bgt_name, WL_MAX_FAILURES); + ubi_ro_mode(ubi); +- break; ++ ubi->thread_enabled = 0; ++ continue; + } + } else + failures = 0; +@@ -1351,7 +1336,6 @@ + cond_resched(); + } + +-out: + dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); + return 0; + } +@@ -1374,8 +1358,7 @@ + } + + /** +- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning +- * information. ++ * ubi_wl_init_scan - initialize the WL sub-system using scanning information. + * @ubi: UBI device description object + * @si: scanning information + * +@@ -1384,46 +1367,34 @@ + */ + int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) + { +- int err; ++ int err, i; + struct rb_node *rb1, *rb2; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb, *tmp; + struct ubi_wl_entry *e; + +- + ubi->used = ubi->free = ubi->scrub = RB_ROOT; +- ubi->prot.pnum = ubi->prot.aec = RB_ROOT; + spin_lock_init(&ubi->wl_lock); ++ mutex_init(&ubi->move_mutex); ++ init_rwsem(&ubi->work_sem); + ubi->max_ec = si->max_ec; + INIT_LIST_HEAD(&ubi->works); + + sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); + +- ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); +- if (IS_ERR(ubi->bgt_thread)) { +- err = PTR_ERR(ubi->bgt_thread); +- ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, +- err); +- return err; +- } +- +- if (ubi_devices_cnt == 0) { +- wl_entries_slab = kmem_cache_create("ubi_wl_entry_slab", +- sizeof(struct ubi_wl_entry), +- 0, 0, NULL); +- if (!wl_entries_slab) +- return -ENOMEM; +- } +- + err = -ENOMEM; + ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); + if (!ubi->lookuptbl) +- goto out_free; ++ return err; ++ ++ for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) ++ INIT_LIST_HEAD(&ubi->pq[i]); ++ ubi->pq_head = 0; + + list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { + cond_resched(); + +- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); ++ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + +@@ -1431,7 +1402,7 @@ + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, 0)) { +- kmem_cache_free(wl_entries_slab, e); ++ kmem_cache_free(ubi_wl_entry_slab, e); + goto out_free; + } + } +@@ -1439,7 +1410,7 @@ + list_for_each_entry(seb, &si->free, u.list) { + cond_resched(); + +- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); ++ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + +@@ -1453,7 +1424,7 @@ + list_for_each_entry(seb, &si->corr, u.list) { + cond_resched(); + +- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); ++ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + +@@ -1461,7 +1432,7 @@ + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, 0)) { +- kmem_cache_free(wl_entries_slab, e); ++ kmem_cache_free(ubi_wl_entry_slab, e); + goto out_free; + } + } +@@ -1470,7 +1441,7 @@ + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + cond_resched(); + +- e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); ++ e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + goto out_free; + +@@ -1510,70 +1481,45 @@ + tree_destroy(&ubi->free); + tree_destroy(&ubi->scrub); + kfree(ubi->lookuptbl); +- if (ubi_devices_cnt == 0) +- kmem_cache_destroy(wl_entries_slab); + return err; + } + + /** +- * protection_trees_destroy - destroy the protection RB-trees. ++ * protection_queue_destroy - destroy the protection queue. + * @ubi: UBI device description object + */ +-static void protection_trees_destroy(struct ubi_device *ubi) ++static void protection_queue_destroy(struct ubi_device *ubi) + { +- struct rb_node *rb; +- struct ubi_wl_prot_entry *pe; +- +- rb = ubi->prot.aec.rb_node; +- while (rb) { +- if (rb->rb_left) +- rb = rb->rb_left; +- else if (rb->rb_right) +- rb = rb->rb_right; +- else { +- pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); +- +- rb = rb_parent(rb); +- if (rb) { +- if (rb->rb_left == &pe->rb_aec) +- rb->rb_left = NULL; +- else +- rb->rb_right = NULL; +- } ++ int i; ++ struct ubi_wl_entry *e, *tmp; + +- kmem_cache_free(wl_entries_slab, pe->e); +- kfree(pe); ++ for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { ++ list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { ++ list_del(&e->u.list); ++ kmem_cache_free(ubi_wl_entry_slab, e); + } + } + } + + /** +- * ubi_wl_close - close the wear-leveling unit. ++ * ubi_wl_close - close the wear-leveling sub-system. + * @ubi: UBI device description object + */ + void ubi_wl_close(struct ubi_device *ubi) + { +- dbg_wl("disable \"%s\"", ubi->bgt_name); +- if (ubi->bgt_thread) +- kthread_stop(ubi->bgt_thread); +- +- dbg_wl("close the UBI wear-leveling unit"); +- ++ dbg_wl("close the WL sub-system"); + cancel_pending(ubi); +- protection_trees_destroy(ubi); ++ protection_queue_destroy(ubi); + tree_destroy(&ubi->used); + tree_destroy(&ubi->free); + tree_destroy(&ubi->scrub); + kfree(ubi->lookuptbl); +- if (ubi_devices_cnt == 1) +- kmem_cache_destroy(wl_entries_slab); + } + + #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + + /** +- * paranoid_check_ec - make sure that the erase counter of a physical eraseblock +- * is correct. ++ * paranoid_check_ec - make sure that the erase counter of a PEB is correct. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @ec: the erase counter to check +@@ -1614,13 +1560,12 @@ + } + + /** +- * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present +- * in a WL RB-tree. ++ * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * +- * This function returns zero if @e is in the @root RB-tree and %1 if it +- * is not. ++ * This function returns zero if @e is in the @root RB-tree and %1 if it is ++ * not. + */ + static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root) +@@ -1634,4 +1579,27 @@ + return 1; + } + ++/** ++ * paranoid_check_in_pq - check if wear-leveling entry is in the protection ++ * queue. ++ * @ubi: UBI device description object ++ * @e: the wear-leveling entry to check ++ * ++ * This function returns zero if @e is in @ubi->pq and %1 if it is not. ++ */ ++static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) ++{ ++ struct ubi_wl_entry *p; ++ int i; ++ ++ for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) ++ list_for_each_entry(p, &ubi->pq[i], u.list) ++ if (p == e) ++ return 0; ++ ++ ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue", ++ e->pnum, e->ec); ++ ubi_dbg_dump_stack(); ++ return 1; ++} + #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ +diff -Nurd linux-2.6.24/fs/Kconfig ubifs-v2.6.24/fs/Kconfig +--- linux-2.6.24/fs/Kconfig 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/fs/Kconfig 2009-04-07 17:14:47.000000000 +0200 +@@ -1385,6 +1385,9 @@ + + endchoice + ++# UBIFS File system configuration ++source "fs/ubifs/Kconfig" ++ + config CRAMFS + tristate "Compressed ROM file system support (cramfs)" + depends on BLOCK +diff -Nurd linux-2.6.24/fs/Makefile ubifs-v2.6.24/fs/Makefile +--- linux-2.6.24/fs/Makefile 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/fs/Makefile 2009-04-07 17:14:47.000000000 +0200 +@@ -99,6 +99,7 @@ + obj-$(CONFIG_UFS_FS) += ufs/ + obj-$(CONFIG_EFS_FS) += efs/ + obj-$(CONFIG_JFFS2_FS) += jffs2/ ++obj-$(CONFIG_UBIFS_FS) += ubifs/ + obj-$(CONFIG_AFFS_FS) += affs/ + obj-$(CONFIG_ROMFS_FS) += romfs/ + obj-$(CONFIG_QNX4FS_FS) += qnx4/ +diff -Nurd linux-2.6.24/fs/fs-writeback.c ubifs-v2.6.24/fs/fs-writeback.c +--- linux-2.6.24/fs/fs-writeback.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/fs/fs-writeback.c 2009-04-07 17:14:47.000000000 +0200 +@@ -386,8 +386,6 @@ + * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so + * that it can be located for waiting on in __writeback_single_inode(). + * +- * Called under inode_lock. +- * + * If `bdi' is non-zero then we're being asked to writeback a specific queue. + * This function assumes that the blockdev superblock's inodes are backed by + * a variety of queues, so all inodes are searched. For other superblocks, +@@ -403,11 +401,12 @@ + * on the writer throttling path, and we get decent balancing between many + * throttled threads: we don't want them all piling up on inode_sync_wait. + */ +-static void +-sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) ++void generic_sync_sb_inodes(struct super_block *sb, ++ struct writeback_control *wbc) + { + const unsigned long start = jiffies; /* livelock avoidance */ + ++ spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&sb->s_io)) + queue_io(sb, wbc->older_than_this); + +@@ -482,8 +481,16 @@ + if (wbc->nr_to_write <= 0) + break; + } ++ spin_unlock(&inode_lock); + return; /* Leave any unwritten inodes on s_io */ + } ++EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); ++ ++static void sync_sb_inodes(struct super_block *sb, ++ struct writeback_control *wbc) ++{ ++ generic_sync_sb_inodes(sb, wbc); ++} + + /* + * Start writeback of dirty pagecache data against all unlocked inodes. +@@ -524,11 +531,8 @@ + * be unmounted by the time it is released. + */ + if (down_read_trylock(&sb->s_umount)) { +- if (sb->s_root) { +- spin_lock(&inode_lock); ++ if (sb->s_root) + sync_sb_inodes(sb, wbc); +- spin_unlock(&inode_lock); +- } + up_read(&sb->s_umount); + } + spin_lock(&sb_lock); +@@ -566,9 +570,7 @@ + (inodes_stat.nr_inodes - inodes_stat.nr_unused) + + nr_dirty + nr_unstable; + wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ +- spin_lock(&inode_lock); + sync_sb_inodes(sb, &wbc); +- spin_unlock(&inode_lock); + } + + /* +diff -Nurd linux-2.6.24/fs/ubifs/Kconfig ubifs-v2.6.24/fs/ubifs/Kconfig +--- linux-2.6.24/fs/ubifs/Kconfig 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/Kconfig 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,72 @@ ++config UBIFS_FS ++ tristate "UBIFS file system support" ++ select CRC16 ++ select CRC32 ++ select CRYPTO if UBIFS_FS_ADVANCED_COMPR ++ select CRYPTO if UBIFS_FS_LZO ++ select CRYPTO if UBIFS_FS_ZLIB ++ select CRYPTO_LZO if UBIFS_FS_LZO ++ select CRYPTO_DEFLATE if UBIFS_FS_ZLIB ++ depends on MTD_UBI ++ help ++ UBIFS is a file system for flash devices which works on top of UBI. ++ ++config UBIFS_FS_XATTR ++ bool "Extended attributes support" ++ depends on UBIFS_FS ++ help ++ This option enables support of extended attributes. ++ ++config UBIFS_FS_ADVANCED_COMPR ++ bool "Advanced compression options" ++ depends on UBIFS_FS ++ help ++ This option allows to explicitly choose which compressions, if any, ++ are enabled in UBIFS. Removing compressors means inbility to read ++ existing file systems. ++ ++ If unsure, say 'N'. ++ ++config UBIFS_FS_LZO ++ bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR ++ depends on UBIFS_FS ++ default y ++ help ++ LZO compressor is generally faster then zlib but compresses worse. ++ Say 'Y' if unsure. ++ ++config UBIFS_FS_ZLIB ++ bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR ++ depends on UBIFS_FS ++ default y ++ help ++ Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. ++ ++# Debugging-related stuff ++config UBIFS_FS_DEBUG ++ bool "Enable debugging" ++ depends on UBIFS_FS ++ select DEBUG_FS ++ select KALLSYMS_ALL ++ help ++ This option enables UBIFS debugging. ++ ++config UBIFS_FS_DEBUG_MSG_LVL ++ int "Default message level (0 = no extra messages, 3 = lots)" ++ depends on UBIFS_FS_DEBUG ++ default "0" ++ help ++ This controls the amount of debugging messages produced by UBIFS. ++ If reporting bugs, please try to have available a full dump of the ++ messages at level 1 while the misbehaviour was occurring. Level 2 ++ may become necessary if level 1 messages were not enough to find the ++ bug. Generally Level 3 should be avoided. ++ ++config UBIFS_FS_DEBUG_CHKS ++ bool "Enable extra checks" ++ depends on UBIFS_FS_DEBUG ++ help ++ If extra checks are enabled UBIFS will check the consistency of its ++ internal data structures during operation. However, UBIFS performance ++ is dramatically slower when this option is selected especially if the ++ file system is large. +diff -Nurd linux-2.6.24/fs/ubifs/Makefile ubifs-v2.6.24/fs/ubifs/Makefile +--- linux-2.6.24/fs/ubifs/Makefile 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/Makefile 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,9 @@ ++obj-$(CONFIG_UBIFS_FS) += ubifs.o ++ ++ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o ++ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o ++ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o ++ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o ++ ++ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o ++ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o +diff -Nurd linux-2.6.24/fs/ubifs/budget.c ubifs-v2.6.24/fs/ubifs/budget.c +--- linux-2.6.24/fs/ubifs/budget.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/budget.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,755 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements the budgeting sub-system which is responsible for UBIFS ++ * space management. ++ * ++ * Factors such as compression, wasted space at the ends of LEBs, space in other ++ * journal heads, the effect of updates on the index, and so on, make it ++ * impossible to accurately predict the amount of space needed. Consequently ++ * approximations are used. ++ */ ++ ++#include "ubifs.h" ++#include <linux/writeback.h> ++ ++/* ++ * When pessimistic budget calculations say that there is no enough space, ++ * UBIFS starts writing back dirty inodes and pages, doing garbage collection, ++ * or committing. The below constant defines maximum number of times UBIFS ++ * repeats the operations. ++ */ ++#define MAX_MKSPC_RETRIES 3 ++ ++/* ++ * The below constant defines amount of dirty pages which should be written ++ * back at when trying to shrink the liability. ++ */ ++#define NR_TO_WRITE 16 ++ ++/** ++ * shrink_liability - write-back some dirty pages/inodes. ++ * @c: UBIFS file-system description object ++ * @nr_to_write: how many dirty pages to write-back ++ * ++ * This function shrinks UBIFS liability by means of writing back some amount ++ * of dirty inodes and their pages. Returns the amount of pages which were ++ * written back. The returned value does not include dirty inodes which were ++ * synchronized. ++ * ++ * Note, this function synchronizes even VFS inodes which are locked ++ * (@i_mutex) by the caller of the budgeting function, because write-back does ++ * not touch @i_mutex. ++ */ ++static int shrink_liability(struct ubifs_info *c, int nr_to_write) ++{ ++ int nr_written; ++ struct writeback_control wbc = { ++ .sync_mode = WB_SYNC_NONE, ++ .range_end = LLONG_MAX, ++ .nr_to_write = nr_to_write, ++ }; ++ ++ generic_sync_sb_inodes(c->vfs_sb, &wbc); ++ nr_written = nr_to_write - wbc.nr_to_write; ++ ++ if (!nr_written) { ++ /* ++ * Re-try again but wait on pages/inodes which are being ++ * written-back concurrently (e.g., by pdflush). ++ */ ++ memset(&wbc, 0, sizeof(struct writeback_control)); ++ wbc.sync_mode = WB_SYNC_ALL; ++ wbc.range_end = LLONG_MAX; ++ wbc.nr_to_write = nr_to_write; ++ generic_sync_sb_inodes(c->vfs_sb, &wbc); ++ nr_written = nr_to_write - wbc.nr_to_write; ++ } ++ ++ dbg_budg("%d pages were written back", nr_written); ++ return nr_written; ++} ++ ++ ++/** ++ * run_gc - run garbage collector. ++ * @c: UBIFS file-system description object ++ * ++ * This function runs garbage collector to make some more free space. Returns ++ * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a ++ * negative error code in case of failure. ++ */ ++static int run_gc(struct ubifs_info *c) ++{ ++ int err, lnum; ++ ++ /* Make some free space by garbage-collecting dirty space */ ++ down_read(&c->commit_sem); ++ lnum = ubifs_garbage_collect(c, 1); ++ up_read(&c->commit_sem); ++ if (lnum < 0) ++ return lnum; ++ ++ /* GC freed one LEB, return it to lprops */ ++ dbg_budg("GC freed LEB %d", lnum); ++ err = ubifs_return_leb(c, lnum); ++ if (err) ++ return err; ++ return 0; ++} ++ ++/** ++ * get_liability - calculate current liability. ++ * @c: UBIFS file-system description object ++ * ++ * This function calculates and returns current UBIFS liability, i.e. the ++ * amount of bytes UBIFS has "promised" to write to the media. ++ */ ++static long long get_liability(struct ubifs_info *c) ++{ ++ long long liab; ++ ++ spin_lock(&c->space_lock); ++ liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; ++ spin_unlock(&c->space_lock); ++ return liab; ++} ++ ++/** ++ * make_free_space - make more free space on the file-system. ++ * @c: UBIFS file-system description object ++ * ++ * This function is called when an operation cannot be budgeted because there ++ * is supposedly no free space. But in most cases there is some free space: ++ * o budgeting is pessimistic, so it always budgets more then it is actually ++ * needed, so shrinking the liability is one way to make free space - the ++ * cached data will take less space then it was budgeted for; ++ * o GC may turn some dark space into free space (budgeting treats dark space ++ * as not available); ++ * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs. ++ * ++ * So this function tries to do the above. Returns %-EAGAIN if some free space ++ * was presumably made and the caller has to re-try budgeting the operation. ++ * Returns %-ENOSPC if it couldn't do more free space, and other negative error ++ * codes on failures. ++ */ ++static int make_free_space(struct ubifs_info *c) ++{ ++ int err, retries = 0; ++ long long liab1, liab2; ++ ++ do { ++ liab1 = get_liability(c); ++ /* ++ * We probably have some dirty pages or inodes (liability), try ++ * to write them back. ++ */ ++ dbg_budg("liability %lld, run write-back", liab1); ++ shrink_liability(c, NR_TO_WRITE); ++ ++ liab2 = get_liability(c); ++ if (liab2 < liab1) ++ return -EAGAIN; ++ ++ dbg_budg("new liability %lld (not shrinked)", liab2); ++ ++ /* Liability did not shrink again, try GC */ ++ dbg_budg("Run GC"); ++ err = run_gc(c); ++ if (!err) ++ return -EAGAIN; ++ ++ if (err != -EAGAIN && err != -ENOSPC) ++ /* Some real error happened */ ++ return err; ++ ++ dbg_budg("Run commit (retries %d)", retries); ++ err = ubifs_run_commit(c); ++ if (err) ++ return err; ++ } while (retries++ < MAX_MKSPC_RETRIES); ++ ++ return -ENOSPC; ++} ++ ++/** ++ * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. ++ * @c: UBIFS file-system description object ++ * ++ * This function calculates and returns the number of LEBs which should be kept ++ * for index usage. ++ */ ++int ubifs_calc_min_idx_lebs(struct ubifs_info *c) ++{ ++ int idx_lebs; ++ long long idx_size; ++ ++ idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; ++ /* And make sure we have thrice the index size of space reserved */ ++ idx_size += idx_size << 1; ++ /* ++ * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' ++ * pair, nor similarly the two variables for the new index size, so we ++ * have to do this costly 64-bit division on fast-path. ++ */ ++ idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); ++ /* ++ * The index head is not available for the in-the-gaps method, so add an ++ * extra LEB to compensate. ++ */ ++ idx_lebs += 1; ++ if (idx_lebs < MIN_INDEX_LEBS) ++ idx_lebs = MIN_INDEX_LEBS; ++ return idx_lebs; ++} ++ ++/** ++ * ubifs_calc_available - calculate available FS space. ++ * @c: UBIFS file-system description object ++ * @min_idx_lebs: minimum number of LEBs reserved for the index ++ * ++ * This function calculates and returns amount of FS space available for use. ++ */ ++long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) ++{ ++ int subtract_lebs; ++ long long available; ++ ++ available = c->main_bytes - c->lst.total_used; ++ ++ /* ++ * Now 'available' contains theoretically available flash space ++ * assuming there is no index, so we have to subtract the space which ++ * is reserved for the index. ++ */ ++ subtract_lebs = min_idx_lebs; ++ ++ /* Take into account that GC reserves one LEB for its own needs */ ++ subtract_lebs += 1; ++ ++ /* ++ * The GC journal head LEB is not really accessible. And since ++ * different write types go to different heads, we may count only on ++ * one head's space. ++ */ ++ subtract_lebs += c->jhead_cnt - 1; ++ ++ /* We also reserve one LEB for deletions, which bypass budgeting */ ++ subtract_lebs += 1; ++ ++ available -= (long long)subtract_lebs * c->leb_size; ++ ++ /* Subtract the dead space which is not available for use */ ++ available -= c->lst.total_dead; ++ ++ /* ++ * Subtract dark space, which might or might not be usable - it depends ++ * on the data which we have on the media and which will be written. If ++ * this is a lot of uncompressed or not-compressible data, the dark ++ * space cannot be used. ++ */ ++ available -= c->lst.total_dark; ++ ++ /* ++ * However, there is more dark space. The index may be bigger than ++ * @min_idx_lebs. Those extra LEBs are assumed to be available, but ++ * their dark space is not included in total_dark, so it is subtracted ++ * here. ++ */ ++ if (c->lst.idx_lebs > min_idx_lebs) { ++ subtract_lebs = c->lst.idx_lebs - min_idx_lebs; ++ available -= subtract_lebs * c->dark_wm; ++ } ++ ++ /* The calculations are rough and may end up with a negative number */ ++ return available > 0 ? available : 0; ++} ++ ++/** ++ * can_use_rp - check whether the user is allowed to use reserved pool. ++ * @c: UBIFS file-system description object ++ * ++ * UBIFS has so-called "reserved pool" which is flash space reserved ++ * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock. ++ * This function checks whether current user is allowed to use reserved pool. ++ * Returns %1 current user is allowed to use reserved pool and %0 otherwise. ++ */ ++static int can_use_rp(struct ubifs_info *c) ++{ ++ if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) || ++ (c->rp_gid != 0 && in_group_p(c->rp_gid))) ++ return 1; ++ return 0; ++} ++ ++/** ++ * do_budget_space - reserve flash space for index and data growth. ++ * @c: UBIFS file-system description object ++ * ++ * This function makes sure UBIFS has enough free LEBs for index growth and ++ * data. ++ * ++ * When budgeting index space, UBIFS reserves thrice as many LEBs as the index ++ * would take if it was consolidated and written to the flash. This guarantees ++ * that the "in-the-gaps" commit method always succeeds and UBIFS will always ++ * be able to commit dirty index. So this function basically adds amount of ++ * budgeted index space to the size of the current index, multiplies this by 3, ++ * and makes sure this does not exceed the amount of free LEBs. ++ * ++ * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: ++ * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might ++ * be large, because UBIFS does not do any index consolidation as long as ++ * there is free space. IOW, the index may take a lot of LEBs, but the LEBs ++ * will contain a lot of dirt. ++ * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, ++ * the index may be consolidated to take up to @c->min_idx_lebs LEBs. ++ * ++ * This function returns zero in case of success, and %-ENOSPC in case of ++ * failure. ++ */ ++static int do_budget_space(struct ubifs_info *c) ++{ ++ long long outstanding, available; ++ int lebs, rsvd_idx_lebs, min_idx_lebs; ++ ++ /* First budget index space */ ++ min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ++ /* Now 'min_idx_lebs' contains number of LEBs to reserve */ ++ if (min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; ++ else ++ rsvd_idx_lebs = 0; ++ ++ /* ++ * The number of LEBs that are available to be used by the index is: ++ * ++ * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - ++ * @c->lst.taken_empty_lebs ++ * ++ * @c->lst.empty_lebs are available because they are empty. ++ * @c->freeable_cnt are available because they contain only free and ++ * dirty space, @c->idx_gc_cnt are available because they are index ++ * LEBs that have been garbage collected and are awaiting the commit ++ * before they can be used. And the in-the-gaps method will grab these ++ * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have ++ * already been allocated for some purpose. ++ * ++ * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because ++ * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they ++ * are taken until after the commit). ++ * ++ * Note, @c->lst.taken_empty_lebs may temporarily be higher by one ++ * because of the way we serialize LEB allocations and budgeting. See a ++ * comment in 'ubifs_find_free_space()'. ++ */ ++ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - ++ c->lst.taken_empty_lebs; ++ if (unlikely(rsvd_idx_lebs > lebs)) { ++ dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " ++ "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, ++ rsvd_idx_lebs); ++ return -ENOSPC; ++ } ++ ++ available = ubifs_calc_available(c, min_idx_lebs); ++ outstanding = c->budg_data_growth + c->budg_dd_growth; ++ ++ if (unlikely(available < outstanding)) { ++ dbg_budg("out of data space: available %lld, outstanding %lld", ++ available, outstanding); ++ return -ENOSPC; ++ } ++ ++ if (available - outstanding <= c->rp_size && !can_use_rp(c)) ++ return -ENOSPC; ++ ++ c->min_idx_lebs = min_idx_lebs; ++ return 0; ++} ++ ++/** ++ * calc_idx_growth - calculate approximate index growth from budgeting request. ++ * @c: UBIFS file-system description object ++ * @req: budgeting request ++ * ++ * For now we assume each new node adds one znode. But this is rather poor ++ * approximation, though. ++ */ ++static int calc_idx_growth(const struct ubifs_info *c, ++ const struct ubifs_budget_req *req) ++{ ++ int znodes; ++ ++ znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) + ++ req->new_dent; ++ return znodes * c->max_idx_node_sz; ++} ++ ++/** ++ * calc_data_growth - calculate approximate amount of new data from budgeting ++ * request. ++ * @c: UBIFS file-system description object ++ * @req: budgeting request ++ */ ++static int calc_data_growth(const struct ubifs_info *c, ++ const struct ubifs_budget_req *req) ++{ ++ int data_growth; ++ ++ data_growth = req->new_ino ? c->inode_budget : 0; ++ if (req->new_page) ++ data_growth += c->page_budget; ++ if (req->new_dent) ++ data_growth += c->dent_budget; ++ data_growth += req->new_ino_d; ++ return data_growth; ++} ++ ++/** ++ * calc_dd_growth - calculate approximate amount of data which makes other data ++ * dirty from budgeting request. ++ * @c: UBIFS file-system description object ++ * @req: budgeting request ++ */ ++static int calc_dd_growth(const struct ubifs_info *c, ++ const struct ubifs_budget_req *req) ++{ ++ int dd_growth; ++ ++ dd_growth = req->dirtied_page ? c->page_budget : 0; ++ ++ if (req->dirtied_ino) ++ dd_growth += c->inode_budget << (req->dirtied_ino - 1); ++ if (req->mod_dent) ++ dd_growth += c->dent_budget; ++ dd_growth += req->dirtied_ino_d; ++ return dd_growth; ++} ++ ++/** ++ * ubifs_budget_space - ensure there is enough space to complete an operation. ++ * @c: UBIFS file-system description object ++ * @req: budget request ++ * ++ * This function allocates budget for an operation. It uses pessimistic ++ * approximation of how much flash space the operation needs. The goal of this ++ * function is to make sure UBIFS always has flash space to flush all dirty ++ * pages, dirty inodes, and dirty znodes (liability). This function may force ++ * commit, garbage-collection or write-back. Returns zero in case of success, ++ * %-ENOSPC if there is no free space and other negative error codes in case of ++ * failures. ++ */ ++int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) ++{ ++ int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); ++ int err, idx_growth, data_growth, dd_growth, retried = 0; ++ ++ ubifs_assert(req->new_page <= 1); ++ ubifs_assert(req->dirtied_page <= 1); ++ ubifs_assert(req->new_dent <= 1); ++ ubifs_assert(req->mod_dent <= 1); ++ ubifs_assert(req->new_ino <= 1); ++ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); ++ ubifs_assert(req->dirtied_ino <= 4); ++ ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); ++ ubifs_assert(!(req->new_ino_d & 7)); ++ ubifs_assert(!(req->dirtied_ino_d & 7)); ++ ++ data_growth = calc_data_growth(c, req); ++ dd_growth = calc_dd_growth(c, req); ++ if (!data_growth && !dd_growth) ++ return 0; ++ idx_growth = calc_idx_growth(c, req); ++ ++again: ++ spin_lock(&c->space_lock); ++ ubifs_assert(c->budg_idx_growth >= 0); ++ ubifs_assert(c->budg_data_growth >= 0); ++ ubifs_assert(c->budg_dd_growth >= 0); ++ ++ if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { ++ dbg_budg("no space"); ++ spin_unlock(&c->space_lock); ++ return -ENOSPC; ++ } ++ ++ c->budg_idx_growth += idx_growth; ++ c->budg_data_growth += data_growth; ++ c->budg_dd_growth += dd_growth; ++ ++ err = do_budget_space(c); ++ if (likely(!err)) { ++ req->idx_growth = idx_growth; ++ req->data_growth = data_growth; ++ req->dd_growth = dd_growth; ++ spin_unlock(&c->space_lock); ++ return 0; ++ } ++ ++ /* Restore the old values */ ++ c->budg_idx_growth -= idx_growth; ++ c->budg_data_growth -= data_growth; ++ c->budg_dd_growth -= dd_growth; ++ spin_unlock(&c->space_lock); ++ ++ if (req->fast) { ++ dbg_budg("no space for fast budgeting"); ++ return err; ++ } ++ ++ err = make_free_space(c); ++ cond_resched(); ++ if (err == -EAGAIN) { ++ dbg_budg("try again"); ++ goto again; ++ } else if (err == -ENOSPC) { ++ if (!retried) { ++ retried = 1; ++ dbg_budg("-ENOSPC, but anyway try once again"); ++ goto again; ++ } ++ dbg_budg("FS is full, -ENOSPC"); ++ c->nospace = 1; ++ if (can_use_rp(c) || c->rp_size == 0) ++ c->nospace_rp = 1; ++ smp_wmb(); ++ } else ++ ubifs_err("cannot budget space, error %d", err); ++ return err; ++} ++ ++/** ++ * ubifs_release_budget - release budgeted free space. ++ * @c: UBIFS file-system description object ++ * @req: budget request ++ * ++ * This function releases the space budgeted by 'ubifs_budget_space()'. Note, ++ * since the index changes (which were budgeted for in @req->idx_growth) will ++ * only be written to the media on commit, this function moves the index budget ++ * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be ++ * zeroed by the commit operation. ++ */ ++void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) ++{ ++ ubifs_assert(req->new_page <= 1); ++ ubifs_assert(req->dirtied_page <= 1); ++ ubifs_assert(req->new_dent <= 1); ++ ubifs_assert(req->mod_dent <= 1); ++ ubifs_assert(req->new_ino <= 1); ++ ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); ++ ubifs_assert(req->dirtied_ino <= 4); ++ ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); ++ ubifs_assert(!(req->new_ino_d & 7)); ++ ubifs_assert(!(req->dirtied_ino_d & 7)); ++ if (!req->recalculate) { ++ ubifs_assert(req->idx_growth >= 0); ++ ubifs_assert(req->data_growth >= 0); ++ ubifs_assert(req->dd_growth >= 0); ++ } ++ ++ if (req->recalculate) { ++ req->data_growth = calc_data_growth(c, req); ++ req->dd_growth = calc_dd_growth(c, req); ++ req->idx_growth = calc_idx_growth(c, req); ++ } ++ ++ if (!req->data_growth && !req->dd_growth) ++ return; ++ ++ c->nospace = c->nospace_rp = 0; ++ smp_wmb(); ++ ++ spin_lock(&c->space_lock); ++ c->budg_idx_growth -= req->idx_growth; ++ c->budg_uncommitted_idx += req->idx_growth; ++ c->budg_data_growth -= req->data_growth; ++ c->budg_dd_growth -= req->dd_growth; ++ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ++ ubifs_assert(c->budg_idx_growth >= 0); ++ ubifs_assert(c->budg_data_growth >= 0); ++ ubifs_assert(c->budg_dd_growth >= 0); ++ ubifs_assert(c->min_idx_lebs < c->main_lebs); ++ ubifs_assert(!(c->budg_idx_growth & 7)); ++ ubifs_assert(!(c->budg_data_growth & 7)); ++ ubifs_assert(!(c->budg_dd_growth & 7)); ++ spin_unlock(&c->space_lock); ++} ++ ++/** ++ * ubifs_convert_page_budget - convert budget of a new page. ++ * @c: UBIFS file-system description object ++ * ++ * This function converts budget which was allocated for a new page of data to ++ * the budget of changing an existing page of data. The latter is smaller then ++ * the former, so this function only does simple re-calculation and does not ++ * involve any write-back. ++ */ ++void ubifs_convert_page_budget(struct ubifs_info *c) ++{ ++ spin_lock(&c->space_lock); ++ /* Release the index growth reservation */ ++ c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ /* Release the data growth reservation */ ++ c->budg_data_growth -= c->page_budget; ++ /* Increase the dirty data growth reservation instead */ ++ c->budg_dd_growth += c->page_budget; ++ /* And re-calculate the indexing space reservation */ ++ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ spin_unlock(&c->space_lock); ++} ++ ++/** ++ * ubifs_release_dirty_inode_budget - release dirty inode budget. ++ * @c: UBIFS file-system description object ++ * @ui: UBIFS inode to release the budget for ++ * ++ * This function releases budget corresponding to a dirty inode. It is usually ++ * called when after the inode has been written to the media and marked as ++ * clean. ++ */ ++void ubifs_release_dirty_inode_budget(struct ubifs_info *c, ++ struct ubifs_inode *ui) ++{ ++ struct ubifs_budget_req req; ++ ++ memset(&req, 0, sizeof(struct ubifs_budget_req)); ++ req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ++ ubifs_release_budget(c, &req); ++} ++ ++/** ++ * ubifs_reported_space - calculate reported free space. ++ * @c: the UBIFS file-system description object ++ * @free: amount of free space ++ * ++ * This function calculates amount of free space which will be reported to ++ * user-space. User-space application tend to expect that if the file-system ++ * (e.g., via the 'statfs()' call) reports that it has N bytes available, they ++ * are able to write a file of size N. UBIFS attaches node headers to each data ++ * node and it has to write indexing nodes as well. This introduces additional ++ * overhead, and UBIFS has to report slightly less free space to meet the above ++ * expectations. ++ * ++ * This function assumes free space is made up of uncompressed data nodes and ++ * full index nodes (one per data node, tripled because we always allow enough ++ * space to write the index thrice). ++ * ++ * Note, the calculation is pessimistic, which means that most of the time ++ * UBIFS reports less space than it actually has. ++ */ ++long long ubifs_reported_space(const struct ubifs_info *c, long long free) ++{ ++ int divisor, factor, f; ++ ++ /* ++ * Reported space size is @free * X, where X is UBIFS block size ++ * divided by UBIFS block size + all overhead one data block ++ * introduces. The overhead is the node header + indexing overhead. ++ * ++ * Indexing overhead calculations are based on the following formula: ++ * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number ++ * of data nodes, f - fanout. Because effective UBIFS fanout is twice ++ * as less than maximum fanout, we assume that each data node ++ * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. ++ * Note, the multiplier 3 is because UBIFS reserves thrice as more space ++ * for the index. ++ */ ++ f = c->fanout > 3 ? c->fanout >> 1 : 2; ++ factor = UBIFS_BLOCK_SIZE; ++ divisor = UBIFS_MAX_DATA_NODE_SZ; ++ divisor += (c->max_idx_node_sz * 3) / (f - 1); ++ free *= factor; ++ return div_u64(free, divisor); ++} ++ ++/** ++ * ubifs_get_free_space_nolock - return amount of free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function calculates amount of free space to report to user-space. ++ * ++ * Because UBIFS may introduce substantial overhead (the index, node headers, ++ * alignment, wastage at the end of LEBs, etc), it cannot report real amount of ++ * free flash space it has (well, because not all dirty space is reclaimable, ++ * UBIFS does not actually know the real amount). If UBIFS did so, it would ++ * bread user expectations about what free space is. Users seem to accustomed ++ * to assume that if the file-system reports N bytes of free space, they would ++ * be able to fit a file of N bytes to the FS. This almost works for ++ * traditional file-systems, because they have way less overhead than UBIFS. ++ * So, to keep users happy, UBIFS tries to take the overhead into account. ++ */ ++long long ubifs_get_free_space_nolock(struct ubifs_info *c) ++{ ++ int rsvd_idx_lebs, lebs; ++ long long available, outstanding, free; ++ ++ ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ outstanding = c->budg_data_growth + c->budg_dd_growth; ++ available = ubifs_calc_available(c, c->min_idx_lebs); ++ ++ /* ++ * When reporting free space to user-space, UBIFS guarantees that it is ++ * possible to write a file of free space size. This means that for ++ * empty LEBs we may use more precise calculations than ++ * 'ubifs_calc_available()' is using. Namely, we know that in empty ++ * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. ++ * Thus, amend the available space. ++ * ++ * Note, the calculations below are similar to what we have in ++ * 'do_budget_space()', so refer there for comments. ++ */ ++ if (c->min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ else ++ rsvd_idx_lebs = 0; ++ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - ++ c->lst.taken_empty_lebs; ++ lebs -= rsvd_idx_lebs; ++ available += lebs * (c->dark_wm - c->leb_overhead); ++ ++ if (available > outstanding) ++ free = ubifs_reported_space(c, available - outstanding); ++ else ++ free = 0; ++ return free; ++} ++ ++/** ++ * ubifs_get_free_space - return amount of free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function calculates and retuns amount of free space to report to ++ * user-space. ++ */ ++long long ubifs_get_free_space(struct ubifs_info *c) ++{ ++ long long free; ++ ++ spin_lock(&c->space_lock); ++ free = ubifs_get_free_space_nolock(c); ++ spin_unlock(&c->space_lock); ++ ++ return free; ++} +diff -Nurd linux-2.6.24/fs/ubifs/commit.c ubifs-v2.6.24/fs/ubifs/commit.c +--- linux-2.6.24/fs/ubifs/commit.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/commit.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,679 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements functions that manage the running of the commit process. ++ * Each affected module has its own functions to accomplish their part in the ++ * commit and those functions are called here. ++ * ++ * The commit is the process whereby all updates to the index and LEB properties ++ * are written out together and the journal becomes empty. This keeps the ++ * file system consistent - at all times the state can be recreated by reading ++ * the index and LEB properties and then replaying the journal. ++ * ++ * The commit is split into two parts named "commit start" and "commit end". ++ * During commit start, the commit process has exclusive access to the journal ++ * by holding the commit semaphore down for writing. As few I/O operations as ++ * possible are performed during commit start, instead the nodes that are to be ++ * written are merely identified. During commit end, the commit semaphore is no ++ * longer held and the journal is again in operation, allowing users to continue ++ * to use the file system while the bulk of the commit I/O is performed. The ++ * purpose of this two-step approach is to prevent the commit from causing any ++ * latency blips. Note that in any case, the commit does not prevent lookups ++ * (as permitted by the TNC mutex), or access to VFS data structures e.g. page ++ * cache. ++ */ ++ ++#include <linux/freezer.h> ++#include <linux/kthread.h> ++#include "ubifs.h" ++ ++/** ++ * do_commit - commit the journal. ++ * @c: UBIFS file-system description object ++ * ++ * This function implements UBIFS commit. It has to be called with commit lock ++ * locked. Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++static int do_commit(struct ubifs_info *c) ++{ ++ int err, new_ltail_lnum, old_ltail_lnum, i; ++ struct ubifs_zbranch zroot; ++ struct ubifs_lp_stats lst; ++ ++ dbg_cmt("start"); ++ if (c->ro_media) { ++ err = -EROFS; ++ goto out_up; ++ } ++ ++ /* Sync all write buffers (necessary for recovery) */ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); ++ if (err) ++ goto out_up; ++ } ++ ++ c->cmt_no += 1; ++ err = ubifs_gc_start_commit(c); ++ if (err) ++ goto out_up; ++ err = dbg_check_lprops(c); ++ if (err) ++ goto out_up; ++ err = ubifs_log_start_commit(c, &new_ltail_lnum); ++ if (err) ++ goto out_up; ++ err = ubifs_tnc_start_commit(c, &zroot); ++ if (err) ++ goto out_up; ++ err = ubifs_lpt_start_commit(c); ++ if (err) ++ goto out_up; ++ err = ubifs_orphan_start_commit(c); ++ if (err) ++ goto out_up; ++ ++ ubifs_get_lp_stats(c, &lst); ++ ++ up_write(&c->commit_sem); ++ ++ err = ubifs_tnc_end_commit(c); ++ if (err) ++ goto out; ++ err = ubifs_lpt_end_commit(c); ++ if (err) ++ goto out; ++ err = ubifs_orphan_end_commit(c); ++ if (err) ++ goto out; ++ old_ltail_lnum = c->ltail_lnum; ++ err = ubifs_log_end_commit(c, new_ltail_lnum); ++ if (err) ++ goto out; ++ err = dbg_check_old_index(c, &zroot); ++ if (err) ++ goto out; ++ ++ mutex_lock(&c->mst_mutex); ++ c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); ++ c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); ++ c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); ++ c->mst_node->root_offs = cpu_to_le32(zroot.offs); ++ c->mst_node->root_len = cpu_to_le32(zroot.len); ++ c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); ++ c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); ++ c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); ++ c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); ++ c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); ++ c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); ++ c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs); ++ c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum); ++ c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs); ++ c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum); ++ c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs); ++ c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum); ++ c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs); ++ c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs); ++ c->mst_node->total_free = cpu_to_le64(lst.total_free); ++ c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty); ++ c->mst_node->total_used = cpu_to_le64(lst.total_used); ++ c->mst_node->total_dead = cpu_to_le64(lst.total_dead); ++ c->mst_node->total_dark = cpu_to_le64(lst.total_dark); ++ if (c->no_orphs) ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); ++ else ++ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); ++ err = ubifs_write_master(c); ++ mutex_unlock(&c->mst_mutex); ++ if (err) ++ goto out; ++ ++ err = ubifs_log_post_commit(c, old_ltail_lnum); ++ if (err) ++ goto out; ++ err = ubifs_gc_end_commit(c); ++ if (err) ++ goto out; ++ err = ubifs_lpt_post_commit(c); ++ if (err) ++ goto out; ++ ++ spin_lock(&c->cs_lock); ++ c->cmt_state = COMMIT_RESTING; ++ wake_up(&c->cmt_wq); ++ dbg_cmt("commit end"); ++ spin_unlock(&c->cs_lock); ++ ++ return 0; ++ ++out_up: ++ up_write(&c->commit_sem); ++out: ++ ubifs_err("commit failed, error %d", err); ++ spin_lock(&c->cs_lock); ++ c->cmt_state = COMMIT_BROKEN; ++ wake_up(&c->cmt_wq); ++ spin_unlock(&c->cs_lock); ++ ubifs_ro_mode(c, err); ++ return err; ++} ++ ++/** ++ * run_bg_commit - run background commit if it is needed. ++ * @c: UBIFS file-system description object ++ * ++ * This function runs background commit if it is needed. Returns zero in case ++ * of success and a negative error code in case of failure. ++ */ ++static int run_bg_commit(struct ubifs_info *c) ++{ ++ spin_lock(&c->cs_lock); ++ /* ++ * Run background commit only if background commit was requested or if ++ * commit is required. ++ */ ++ if (c->cmt_state != COMMIT_BACKGROUND && ++ c->cmt_state != COMMIT_REQUIRED) ++ goto out; ++ spin_unlock(&c->cs_lock); ++ ++ down_write(&c->commit_sem); ++ spin_lock(&c->cs_lock); ++ if (c->cmt_state == COMMIT_REQUIRED) ++ c->cmt_state = COMMIT_RUNNING_REQUIRED; ++ else if (c->cmt_state == COMMIT_BACKGROUND) ++ c->cmt_state = COMMIT_RUNNING_BACKGROUND; ++ else ++ goto out_cmt_unlock; ++ spin_unlock(&c->cs_lock); ++ ++ return do_commit(c); ++ ++out_cmt_unlock: ++ up_write(&c->commit_sem); ++out: ++ spin_unlock(&c->cs_lock); ++ return 0; ++} ++ ++/** ++ * ubifs_bg_thread - UBIFS background thread function. ++ * @info: points to the file-system description object ++ * ++ * This function implements various file-system background activities: ++ * o when a write-buffer timer expires it synchronizes the appropriate ++ * write-buffer; ++ * o when the journal is about to be full, it starts in-advance commit. ++ * ++ * Note, other stuff like background garbage collection may be added here in ++ * future. ++ */ ++int ubifs_bg_thread(void *info) ++{ ++ int err; ++ struct ubifs_info *c = info; ++ ++ dbg_msg("background thread \"%s\" started, PID %d", ++ c->bgt_name, current->pid); ++ set_freezable(); ++ ++ while (1) { ++ if (kthread_should_stop()) ++ break; ++ ++ if (try_to_freeze()) ++ continue; ++ ++ set_current_state(TASK_INTERRUPTIBLE); ++ /* Check if there is something to do */ ++ if (!c->need_bgt) { ++ /* ++ * Nothing prevents us from going sleep now and ++ * be never woken up and block the task which ++ * could wait in 'kthread_stop()' forever. ++ */ ++ if (kthread_should_stop()) ++ break; ++ schedule(); ++ continue; ++ } else ++ __set_current_state(TASK_RUNNING); ++ ++ c->need_bgt = 0; ++ err = ubifs_bg_wbufs_sync(c); ++ if (err) ++ ubifs_ro_mode(c, err); ++ ++ run_bg_commit(c); ++ cond_resched(); ++ } ++ ++ dbg_msg("background thread \"%s\" stops", c->bgt_name); ++ return 0; ++} ++ ++/** ++ * ubifs_commit_required - set commit state to "required". ++ * @c: UBIFS file-system description object ++ * ++ * This function is called if a commit is required but cannot be done from the ++ * calling function, so it is just flagged instead. ++ */ ++void ubifs_commit_required(struct ubifs_info *c) ++{ ++ spin_lock(&c->cs_lock); ++ switch (c->cmt_state) { ++ case COMMIT_RESTING: ++ case COMMIT_BACKGROUND: ++ dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), ++ dbg_cstate(COMMIT_REQUIRED)); ++ c->cmt_state = COMMIT_REQUIRED; ++ break; ++ case COMMIT_RUNNING_BACKGROUND: ++ dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), ++ dbg_cstate(COMMIT_RUNNING_REQUIRED)); ++ c->cmt_state = COMMIT_RUNNING_REQUIRED; ++ break; ++ case COMMIT_REQUIRED: ++ case COMMIT_RUNNING_REQUIRED: ++ case COMMIT_BROKEN: ++ break; ++ } ++ spin_unlock(&c->cs_lock); ++} ++ ++/** ++ * ubifs_request_bg_commit - notify the background thread to do a commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function is called if the journal is full enough to make a commit ++ * worthwhile, so background thread is kicked to start it. ++ */ ++void ubifs_request_bg_commit(struct ubifs_info *c) ++{ ++ spin_lock(&c->cs_lock); ++ if (c->cmt_state == COMMIT_RESTING) { ++ dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), ++ dbg_cstate(COMMIT_BACKGROUND)); ++ c->cmt_state = COMMIT_BACKGROUND; ++ spin_unlock(&c->cs_lock); ++ ubifs_wake_up_bgt(c); ++ } else ++ spin_unlock(&c->cs_lock); ++} ++ ++/** ++ * wait_for_commit - wait for commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function sleeps until the commit operation is no longer running. ++ */ ++static int wait_for_commit(struct ubifs_info *c) ++{ ++ dbg_cmt("pid %d goes sleep", current->pid); ++ ++ /* ++ * The following sleeps if the condition is false, and will be woken ++ * when the commit ends. It is possible, although very unlikely, that we ++ * will wake up and see the subsequent commit running, rather than the ++ * one we were waiting for, and go back to sleep. However, we will be ++ * woken again, so there is no danger of sleeping forever. ++ */ ++ wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND && ++ c->cmt_state != COMMIT_RUNNING_REQUIRED); ++ dbg_cmt("commit finished, pid %d woke up", current->pid); ++ return 0; ++} ++ ++/** ++ * ubifs_run_commit - run or wait for commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function runs commit and returns zero in case of success and a negative ++ * error code in case of failure. ++ */ ++int ubifs_run_commit(struct ubifs_info *c) ++{ ++ int err = 0; ++ ++ spin_lock(&c->cs_lock); ++ if (c->cmt_state == COMMIT_BROKEN) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) ++ /* ++ * We set the commit state to 'running required' to indicate ++ * that we want it to complete as quickly as possible. ++ */ ++ c->cmt_state = COMMIT_RUNNING_REQUIRED; ++ ++ if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { ++ spin_unlock(&c->cs_lock); ++ return wait_for_commit(c); ++ } ++ spin_unlock(&c->cs_lock); ++ ++ /* Ok, the commit is indeed needed */ ++ ++ down_write(&c->commit_sem); ++ spin_lock(&c->cs_lock); ++ /* ++ * Since we unlocked 'c->cs_lock', the state may have changed, so ++ * re-check it. ++ */ ++ if (c->cmt_state == COMMIT_BROKEN) { ++ err = -EINVAL; ++ goto out_cmt_unlock; ++ } ++ ++ if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) ++ c->cmt_state = COMMIT_RUNNING_REQUIRED; ++ ++ if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { ++ up_write(&c->commit_sem); ++ spin_unlock(&c->cs_lock); ++ return wait_for_commit(c); ++ } ++ c->cmt_state = COMMIT_RUNNING_REQUIRED; ++ spin_unlock(&c->cs_lock); ++ ++ err = do_commit(c); ++ return err; ++ ++out_cmt_unlock: ++ up_write(&c->commit_sem); ++out: ++ spin_unlock(&c->cs_lock); ++ return err; ++} ++ ++/** ++ * ubifs_gc_should_commit - determine if it is time for GC to run commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function is called by garbage collection to determine if commit should ++ * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal ++ * is full enough to start commit, this function returns true. It is not ++ * absolutely necessary to commit yet, but it feels like this should be better ++ * then to keep doing GC. This function returns %1 if GC has to initiate commit ++ * and %0 if not. ++ */ ++int ubifs_gc_should_commit(struct ubifs_info *c) ++{ ++ int ret = 0; ++ ++ spin_lock(&c->cs_lock); ++ if (c->cmt_state == COMMIT_BACKGROUND) { ++ dbg_cmt("commit required now"); ++ c->cmt_state = COMMIT_REQUIRED; ++ } else ++ dbg_cmt("commit not requested"); ++ if (c->cmt_state == COMMIT_REQUIRED) ++ ret = 1; ++ spin_unlock(&c->cs_lock); ++ return ret; ++} ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++/** ++ * struct idx_node - hold index nodes during index tree traversal. ++ * @list: list ++ * @iip: index in parent (slot number of this indexing node in the parent ++ * indexing node) ++ * @upper_key: all keys in this indexing node have to be less or equivalent to ++ * this key ++ * @idx: index node (8-byte aligned because all node structures must be 8-byte ++ * aligned) ++ */ ++struct idx_node { ++ struct list_head list; ++ int iip; ++ union ubifs_key upper_key; ++ struct ubifs_idx_node idx __attribute__((aligned(8))); ++}; ++ ++/** ++ * dbg_old_index_check_init - get information for the next old index check. ++ * @c: UBIFS file-system description object ++ * @zroot: root of the index ++ * ++ * This function records information about the index that will be needed for the ++ * next old index check i.e. 'dbg_check_old_index()'. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) ++{ ++ struct ubifs_idx_node *idx; ++ int lnum, offs, len, err = 0; ++ struct ubifs_debug_info *d = c->dbg; ++ ++ d->old_zroot = *zroot; ++ lnum = d->old_zroot.lnum; ++ offs = d->old_zroot.offs; ++ len = d->old_zroot.len; ++ ++ idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); ++ if (!idx) ++ return -ENOMEM; ++ ++ err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); ++ if (err) ++ goto out; ++ ++ d->old_zroot_level = le16_to_cpu(idx->level); ++ d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); ++out: ++ kfree(idx); ++ return err; ++} ++ ++/** ++ * dbg_check_old_index - check the old copy of the index. ++ * @c: UBIFS file-system description object ++ * @zroot: root of the new index ++ * ++ * In order to be able to recover from an unclean unmount, a complete copy of ++ * the index must exist on flash. This is the "old" index. The commit process ++ * must write the "new" index to flash without overwriting or destroying any ++ * part of the old index. This function is run at commit end in order to check ++ * that the old index does indeed exist completely intact. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) ++{ ++ int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; ++ int first = 1, iip; ++ struct ubifs_debug_info *d = c->dbg; ++ union ubifs_key lower_key, upper_key, l_key, u_key; ++ unsigned long long uninitialized_var(last_sqnum); ++ struct ubifs_idx_node *idx; ++ struct list_head list; ++ struct idx_node *i; ++ size_t sz; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) ++ goto out; ++ ++ INIT_LIST_HEAD(&list); ++ ++ sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) - ++ UBIFS_IDX_NODE_SZ; ++ ++ /* Start at the old zroot */ ++ lnum = d->old_zroot.lnum; ++ offs = d->old_zroot.offs; ++ len = d->old_zroot.len; ++ iip = 0; ++ ++ /* ++ * Traverse the index tree preorder depth-first i.e. do a node and then ++ * its subtrees from left to right. ++ */ ++ while (1) { ++ struct ubifs_branch *br; ++ ++ /* Get the next index node */ ++ i = kmalloc(sz, GFP_NOFS); ++ if (!i) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ i->iip = iip; ++ /* Keep the index nodes on our path in a linked list */ ++ list_add_tail(&i->list, &list); ++ /* Read the index node */ ++ idx = &i->idx; ++ err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); ++ if (err) ++ goto out_free; ++ /* Validate index node */ ++ child_cnt = le16_to_cpu(idx->child_cnt); ++ if (child_cnt < 1 || child_cnt > c->fanout) { ++ err = 1; ++ goto out_dump; ++ } ++ if (first) { ++ first = 0; ++ /* Check root level and sqnum */ ++ if (le16_to_cpu(idx->level) != d->old_zroot_level) { ++ err = 2; ++ goto out_dump; ++ } ++ if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) { ++ err = 3; ++ goto out_dump; ++ } ++ /* Set last values as though root had a parent */ ++ last_level = le16_to_cpu(idx->level) + 1; ++ last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1; ++ key_read(c, ubifs_idx_key(c, idx), &lower_key); ++ highest_ino_key(c, &upper_key, INUM_WATERMARK); ++ } ++ key_copy(c, &upper_key, &i->upper_key); ++ if (le16_to_cpu(idx->level) != last_level - 1) { ++ err = 3; ++ goto out_dump; ++ } ++ /* ++ * The index is always written bottom up hence a child's sqnum ++ * is always less than the parents. ++ */ ++ if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) { ++ err = 4; ++ goto out_dump; ++ } ++ /* Check key range */ ++ key_read(c, ubifs_idx_key(c, idx), &l_key); ++ br = ubifs_idx_branch(c, idx, child_cnt - 1); ++ key_read(c, &br->key, &u_key); ++ if (keys_cmp(c, &lower_key, &l_key) > 0) { ++ err = 5; ++ goto out_dump; ++ } ++ if (keys_cmp(c, &upper_key, &u_key) < 0) { ++ err = 6; ++ goto out_dump; ++ } ++ if (keys_cmp(c, &upper_key, &u_key) == 0) ++ if (!is_hash_key(c, &u_key)) { ++ err = 7; ++ goto out_dump; ++ } ++ /* Go to next index node */ ++ if (le16_to_cpu(idx->level) == 0) { ++ /* At the bottom, so go up until can go right */ ++ while (1) { ++ /* Drop the bottom of the list */ ++ list_del(&i->list); ++ kfree(i); ++ /* No more list means we are done */ ++ if (list_empty(&list)) ++ goto out; ++ /* Look at the new bottom */ ++ i = list_entry(list.prev, struct idx_node, ++ list); ++ idx = &i->idx; ++ /* Can we go right */ ++ if (iip + 1 < le16_to_cpu(idx->child_cnt)) { ++ iip = iip + 1; ++ break; ++ } else ++ /* Nope, so go up again */ ++ iip = i->iip; ++ } ++ } else ++ /* Go down left */ ++ iip = 0; ++ /* ++ * We have the parent in 'idx' and now we set up for reading the ++ * child pointed to by slot 'iip'. ++ */ ++ last_level = le16_to_cpu(idx->level); ++ last_sqnum = le64_to_cpu(idx->ch.sqnum); ++ br = ubifs_idx_branch(c, idx, iip); ++ lnum = le32_to_cpu(br->lnum); ++ offs = le32_to_cpu(br->offs); ++ len = le32_to_cpu(br->len); ++ key_read(c, &br->key, &lower_key); ++ if (iip + 1 < le16_to_cpu(idx->child_cnt)) { ++ br = ubifs_idx_branch(c, idx, iip + 1); ++ key_read(c, &br->key, &upper_key); ++ } else ++ key_copy(c, &i->upper_key, &upper_key); ++ } ++out: ++ err = dbg_old_index_check_init(c, zroot); ++ if (err) ++ goto out_free; ++ ++ return 0; ++ ++out_dump: ++ dbg_err("dumping index node (iip=%d)", i->iip); ++ dbg_dump_node(c, idx); ++ list_del(&i->list); ++ kfree(i); ++ if (!list_empty(&list)) { ++ i = list_entry(list.prev, struct idx_node, list); ++ dbg_err("dumping parent index node"); ++ dbg_dump_node(c, &i->idx); ++ } ++out_free: ++ while (!list_empty(&list)) { ++ i = list_entry(list.next, struct idx_node, list); ++ list_del(&i->list); ++ kfree(i); ++ } ++ ubifs_err("failed, error %d", err); ++ if (err > 0) ++ err = -EINVAL; ++ return err; ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -Nurd linux-2.6.24/fs/ubifs/compress.c ubifs-v2.6.24/fs/ubifs/compress.c +--- linux-2.6.24/fs/ubifs/compress.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/compress.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,251 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * Copyright (C) 2006, 2007 University of Szeged, Hungary ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ * Zoltan Sogor ++ */ ++ ++/* ++ * This file provides a single place to access to compression and ++ * decompression. ++ */ ++ ++#include <linux/crypto.h> ++#include "ubifs.h" ++ ++/* Fake description object for the "none" compressor */ ++static struct ubifs_compressor none_compr = { ++ .compr_type = UBIFS_COMPR_NONE, ++ .name = "none", ++ .capi_name = "", ++}; ++ ++#ifdef CONFIG_UBIFS_FS_LZO ++static DEFINE_MUTEX(lzo_mutex); ++ ++static struct ubifs_compressor lzo_compr = { ++ .compr_type = UBIFS_COMPR_LZO, ++ .comp_mutex = &lzo_mutex, ++ .name = "lzo", ++ .capi_name = "lzo", ++}; ++#else ++static struct ubifs_compressor lzo_compr = { ++ .compr_type = UBIFS_COMPR_LZO, ++ .name = "lzo", ++}; ++#endif ++ ++#ifdef CONFIG_UBIFS_FS_ZLIB ++static DEFINE_MUTEX(deflate_mutex); ++static DEFINE_MUTEX(inflate_mutex); ++ ++static struct ubifs_compressor zlib_compr = { ++ .compr_type = UBIFS_COMPR_ZLIB, ++ .comp_mutex = &deflate_mutex, ++ .decomp_mutex = &inflate_mutex, ++ .name = "zlib", ++ .capi_name = "deflate", ++}; ++#else ++static struct ubifs_compressor zlib_compr = { ++ .compr_type = UBIFS_COMPR_ZLIB, ++ .name = "zlib", ++}; ++#endif ++ ++/* All UBIFS compressors */ ++struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; ++ ++/** ++ * ubifs_compress - compress data. ++ * @in_buf: data to compress ++ * @in_len: length of the data to compress ++ * @out_buf: output buffer where compressed data should be stored ++ * @out_len: output buffer length is returned here ++ * @compr_type: type of compression to use on enter, actually used compression ++ * type on exit ++ * ++ * This function compresses input buffer @in_buf of length @in_len and stores ++ * the result in the output buffer @out_buf and the resulting length in ++ * @out_len. If the input buffer does not compress, it is just copied to the ++ * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if ++ * compression error occurred. ++ * ++ * Note, if the input buffer was not compressed, it is copied to the output ++ * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. ++ */ ++void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, ++ int *compr_type) ++{ ++ int err; ++ struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; ++ ++ if (*compr_type == UBIFS_COMPR_NONE) ++ goto no_compr; ++ ++ /* If the input data is small, do not even try to compress it */ ++ if (in_len < UBIFS_MIN_COMPR_LEN) ++ goto no_compr; ++ ++ if (compr->comp_mutex) ++ mutex_lock(compr->comp_mutex); ++ err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, ++ (unsigned int *)out_len); ++ if (compr->comp_mutex) ++ mutex_unlock(compr->comp_mutex); ++ if (unlikely(err)) { ++ ubifs_warn("cannot compress %d bytes, compressor %s, " ++ "error %d, leave data uncompressed", ++ in_len, compr->name, err); ++ goto no_compr; ++ } ++ ++ /* ++ * If the data compressed only slightly, it is better to leave it ++ * uncompressed to improve read speed. ++ */ ++ if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF) ++ goto no_compr; ++ ++ return; ++ ++no_compr: ++ memcpy(out_buf, in_buf, in_len); ++ *out_len = in_len; ++ *compr_type = UBIFS_COMPR_NONE; ++} ++ ++/** ++ * ubifs_decompress - decompress data. ++ * @in_buf: data to decompress ++ * @in_len: length of the data to decompress ++ * @out_buf: output buffer where decompressed data should ++ * @out_len: output length is returned here ++ * @compr_type: type of compression ++ * ++ * This function decompresses data from buffer @in_buf into buffer @out_buf. ++ * The length of the uncompressed data is returned in @out_len. This functions ++ * returns %0 on success or a negative error code on failure. ++ */ ++int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, ++ int *out_len, int compr_type) ++{ ++ int err; ++ struct ubifs_compressor *compr; ++ ++ if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { ++ ubifs_err("invalid compression type %d", compr_type); ++ return -EINVAL; ++ } ++ ++ compr = ubifs_compressors[compr_type]; ++ ++ if (unlikely(!compr->capi_name)) { ++ ubifs_err("%s compression is not compiled in", compr->name); ++ return -EINVAL; ++ } ++ ++ if (compr_type == UBIFS_COMPR_NONE) { ++ memcpy(out_buf, in_buf, in_len); ++ *out_len = in_len; ++ return 0; ++ } ++ ++ if (compr->decomp_mutex) ++ mutex_lock(compr->decomp_mutex); ++ err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, ++ (unsigned int *)out_len); ++ if (compr->decomp_mutex) ++ mutex_unlock(compr->decomp_mutex); ++ if (err) ++ ubifs_err("cannot decompress %d bytes, compressor %s, " ++ "error %d", in_len, compr->name, err); ++ ++ return err; ++} ++ ++/** ++ * compr_init - initialize a compressor. ++ * @compr: compressor description object ++ * ++ * This function initializes the requested compressor and returns zero in case ++ * of success or a negative error code in case of failure. ++ */ ++static int __init compr_init(struct ubifs_compressor *compr) ++{ ++ if (compr->capi_name) { ++ compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); ++ if (IS_ERR(compr->cc)) { ++ ubifs_err("cannot initialize compressor %s, error %ld", ++ compr->name, PTR_ERR(compr->cc)); ++ return PTR_ERR(compr->cc); ++ } ++ } ++ ++ ubifs_compressors[compr->compr_type] = compr; ++ return 0; ++} ++ ++/** ++ * compr_exit - de-initialize a compressor. ++ * @compr: compressor description object ++ */ ++static void compr_exit(struct ubifs_compressor *compr) ++{ ++ if (compr->capi_name) ++ crypto_free_comp(compr->cc); ++ return; ++} ++ ++/** ++ * ubifs_compressors_init - initialize UBIFS compressors. ++ * ++ * This function initializes the compressor which were compiled in. Returns ++ * zero in case of success and a negative error code in case of failure. ++ */ ++int __init ubifs_compressors_init(void) ++{ ++ int err; ++ ++ err = compr_init(&lzo_compr); ++ if (err) ++ return err; ++ ++ err = compr_init(&zlib_compr); ++ if (err) ++ goto out_lzo; ++ ++ ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; ++ return 0; ++ ++out_lzo: ++ compr_exit(&lzo_compr); ++ return err; ++} ++ ++/** ++ * ubifs_compressors_exit - de-initialize UBIFS compressors. ++ */ ++void ubifs_compressors_exit(void) ++{ ++ compr_exit(&lzo_compr); ++ compr_exit(&zlib_compr); ++} +diff -Nurd linux-2.6.24/fs/ubifs/debug.c ubifs-v2.6.24/fs/ubifs/debug.c +--- linux-2.6.24/fs/ubifs/debug.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/debug.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,2603 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file implements most of the debugging stuff which is compiled in only ++ * when it is enabled. But some debugging check functions are implemented in ++ * corresponding subsystem, just because they are closely related and utilize ++ * various local functions of those subsystems. ++ */ ++ ++#define UBIFS_DBG_PRESERVE_UBI ++ ++#include "ubifs.h" ++#include <linux/module.h> ++#include <linux/moduleparam.h> ++#include <linux/debugfs.h> ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++DEFINE_SPINLOCK(dbg_lock); ++ ++static char dbg_key_buf0[128]; ++static char dbg_key_buf1[128]; ++ ++unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; ++unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; ++unsigned int ubifs_tst_flags; ++ ++module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); ++module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); ++module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); ++ ++MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); ++MODULE_PARM_DESC(debug_chks, "Debug check flags"); ++MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); ++ ++static const char *get_key_fmt(int fmt) ++{ ++ switch (fmt) { ++ case UBIFS_SIMPLE_KEY_FMT: ++ return "simple"; ++ default: ++ return "unknown/invalid format"; ++ } ++} ++ ++static const char *get_key_hash(int hash) ++{ ++ switch (hash) { ++ case UBIFS_KEY_HASH_R5: ++ return "R5"; ++ case UBIFS_KEY_HASH_TEST: ++ return "test"; ++ default: ++ return "unknown/invalid name hash"; ++ } ++} ++ ++static const char *get_key_type(int type) ++{ ++ switch (type) { ++ case UBIFS_INO_KEY: ++ return "inode"; ++ case UBIFS_DENT_KEY: ++ return "direntry"; ++ case UBIFS_XENT_KEY: ++ return "xentry"; ++ case UBIFS_DATA_KEY: ++ return "data"; ++ case UBIFS_TRUN_KEY: ++ return "truncate"; ++ default: ++ return "unknown/invalid key"; ++ } ++} ++ ++static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, ++ char *buffer) ++{ ++ char *p = buffer; ++ int type = key_type(c, key); ++ ++ if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { ++ switch (type) { ++ case UBIFS_INO_KEY: ++ sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), ++ get_key_type(type)); ++ break; ++ case UBIFS_DENT_KEY: ++ case UBIFS_XENT_KEY: ++ sprintf(p, "(%lu, %s, %#08x)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type), key_hash(c, key)); ++ break; ++ case UBIFS_DATA_KEY: ++ sprintf(p, "(%lu, %s, %u)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type), key_block(c, key)); ++ break; ++ case UBIFS_TRUN_KEY: ++ sprintf(p, "(%lu, %s)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type)); ++ break; ++ default: ++ sprintf(p, "(bad key type: %#08x, %#08x)", ++ key->u32[0], key->u32[1]); ++ } ++ } else ++ sprintf(p, "bad key format %d", c->key_fmt); ++} ++ ++const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) ++{ ++ /* dbg_lock must be held */ ++ sprintf_key(c, key, dbg_key_buf0); ++ return dbg_key_buf0; ++} ++ ++const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) ++{ ++ /* dbg_lock must be held */ ++ sprintf_key(c, key, dbg_key_buf1); ++ return dbg_key_buf1; ++} ++ ++const char *dbg_ntype(int type) ++{ ++ switch (type) { ++ case UBIFS_PAD_NODE: ++ return "padding node"; ++ case UBIFS_SB_NODE: ++ return "superblock node"; ++ case UBIFS_MST_NODE: ++ return "master node"; ++ case UBIFS_REF_NODE: ++ return "reference node"; ++ case UBIFS_INO_NODE: ++ return "inode node"; ++ case UBIFS_DENT_NODE: ++ return "direntry node"; ++ case UBIFS_XENT_NODE: ++ return "xentry node"; ++ case UBIFS_DATA_NODE: ++ return "data node"; ++ case UBIFS_TRUN_NODE: ++ return "truncate node"; ++ case UBIFS_IDX_NODE: ++ return "indexing node"; ++ case UBIFS_CS_NODE: ++ return "commit start node"; ++ case UBIFS_ORPH_NODE: ++ return "orphan node"; ++ default: ++ return "unknown node"; ++ } ++} ++ ++static const char *dbg_gtype(int type) ++{ ++ switch (type) { ++ case UBIFS_NO_NODE_GROUP: ++ return "no node group"; ++ case UBIFS_IN_NODE_GROUP: ++ return "in node group"; ++ case UBIFS_LAST_OF_NODE_GROUP: ++ return "last of node group"; ++ default: ++ return "unknown"; ++ } ++} ++ ++const char *dbg_cstate(int cmt_state) ++{ ++ switch (cmt_state) { ++ case COMMIT_RESTING: ++ return "commit resting"; ++ case COMMIT_BACKGROUND: ++ return "background commit requested"; ++ case COMMIT_REQUIRED: ++ return "commit required"; ++ case COMMIT_RUNNING_BACKGROUND: ++ return "BACKGROUND commit running"; ++ case COMMIT_RUNNING_REQUIRED: ++ return "commit running and required"; ++ case COMMIT_BROKEN: ++ return "broken commit"; ++ default: ++ return "unknown commit state"; ++ } ++} ++ ++static void dump_ch(const struct ubifs_ch *ch) ++{ ++ printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); ++ printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); ++ printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, ++ dbg_ntype(ch->node_type)); ++ printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, ++ dbg_gtype(ch->group_type)); ++ printk(KERN_DEBUG "\tsqnum %llu\n", ++ (unsigned long long)le64_to_cpu(ch->sqnum)); ++ printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); ++} ++ ++void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) ++{ ++ const struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ printk(KERN_DEBUG "Dump in-memory inode:"); ++ printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); ++ printk(KERN_DEBUG "\tsize %llu\n", ++ (unsigned long long)i_size_read(inode)); ++ printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); ++ printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); ++ printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); ++ printk(KERN_DEBUG "\tatime %u.%u\n", ++ (unsigned int)inode->i_atime.tv_sec, ++ (unsigned int)inode->i_atime.tv_nsec); ++ printk(KERN_DEBUG "\tmtime %u.%u\n", ++ (unsigned int)inode->i_mtime.tv_sec, ++ (unsigned int)inode->i_mtime.tv_nsec); ++ printk(KERN_DEBUG "\tctime %u.%u\n", ++ (unsigned int)inode->i_ctime.tv_sec, ++ (unsigned int)inode->i_ctime.tv_nsec); ++ printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); ++ printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); ++ printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); ++ printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); ++ printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); ++ printk(KERN_DEBUG "\txattr %u\n", ui->xattr); ++ printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); ++ printk(KERN_DEBUG "\tsynced_i_size %llu\n", ++ (unsigned long long)ui->synced_i_size); ++ printk(KERN_DEBUG "\tui_size %llu\n", ++ (unsigned long long)ui->ui_size); ++ printk(KERN_DEBUG "\tflags %d\n", ui->flags); ++ printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); ++ printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); ++ printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); ++ printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); ++} ++ ++void dbg_dump_node(const struct ubifs_info *c, const void *node) ++{ ++ int i, n; ++ union ubifs_key key; ++ const struct ubifs_ch *ch = node; ++ ++ if (dbg_failure_mode) ++ return; ++ ++ /* If the magic is incorrect, just hexdump the first bytes */ ++ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { ++ printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ++ (void *)node, UBIFS_CH_SZ, 1); ++ return; ++ } ++ ++ spin_lock(&dbg_lock); ++ dump_ch(node); ++ ++ switch (ch->node_type) { ++ case UBIFS_PAD_NODE: ++ { ++ const struct ubifs_pad_node *pad = node; ++ ++ printk(KERN_DEBUG "\tpad_len %u\n", ++ le32_to_cpu(pad->pad_len)); ++ break; ++ } ++ case UBIFS_SB_NODE: ++ { ++ const struct ubifs_sb_node *sup = node; ++ unsigned int sup_flags = le32_to_cpu(sup->flags); ++ ++ printk(KERN_DEBUG "\tkey_hash %d (%s)\n", ++ (int)sup->key_hash, get_key_hash(sup->key_hash)); ++ printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", ++ (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); ++ printk(KERN_DEBUG "\tflags %#x\n", sup_flags); ++ printk(KERN_DEBUG "\t big_lpt %u\n", ++ !!(sup_flags & UBIFS_FLG_BIGLPT)); ++ printk(KERN_DEBUG "\tmin_io_size %u\n", ++ le32_to_cpu(sup->min_io_size)); ++ printk(KERN_DEBUG "\tleb_size %u\n", ++ le32_to_cpu(sup->leb_size)); ++ printk(KERN_DEBUG "\tleb_cnt %u\n", ++ le32_to_cpu(sup->leb_cnt)); ++ printk(KERN_DEBUG "\tmax_leb_cnt %u\n", ++ le32_to_cpu(sup->max_leb_cnt)); ++ printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", ++ (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); ++ printk(KERN_DEBUG "\tlog_lebs %u\n", ++ le32_to_cpu(sup->log_lebs)); ++ printk(KERN_DEBUG "\tlpt_lebs %u\n", ++ le32_to_cpu(sup->lpt_lebs)); ++ printk(KERN_DEBUG "\torph_lebs %u\n", ++ le32_to_cpu(sup->orph_lebs)); ++ printk(KERN_DEBUG "\tjhead_cnt %u\n", ++ le32_to_cpu(sup->jhead_cnt)); ++ printk(KERN_DEBUG "\tfanout %u\n", ++ le32_to_cpu(sup->fanout)); ++ printk(KERN_DEBUG "\tlsave_cnt %u\n", ++ le32_to_cpu(sup->lsave_cnt)); ++ printk(KERN_DEBUG "\tdefault_compr %u\n", ++ (int)le16_to_cpu(sup->default_compr)); ++ printk(KERN_DEBUG "\trp_size %llu\n", ++ (unsigned long long)le64_to_cpu(sup->rp_size)); ++ printk(KERN_DEBUG "\trp_uid %u\n", ++ le32_to_cpu(sup->rp_uid)); ++ printk(KERN_DEBUG "\trp_gid %u\n", ++ le32_to_cpu(sup->rp_gid)); ++ printk(KERN_DEBUG "\tfmt_version %u\n", ++ le32_to_cpu(sup->fmt_version)); ++ printk(KERN_DEBUG "\ttime_gran %u\n", ++ le32_to_cpu(sup->time_gran)); ++ printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" ++ "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", ++ sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3], ++ sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7], ++ sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11], ++ sup->uuid[12], sup->uuid[13], sup->uuid[14], ++ sup->uuid[15]); ++ break; ++ } ++ case UBIFS_MST_NODE: ++ { ++ const struct ubifs_mst_node *mst = node; ++ ++ printk(KERN_DEBUG "\thighest_inum %llu\n", ++ (unsigned long long)le64_to_cpu(mst->highest_inum)); ++ printk(KERN_DEBUG "\tcommit number %llu\n", ++ (unsigned long long)le64_to_cpu(mst->cmt_no)); ++ printk(KERN_DEBUG "\tflags %#x\n", ++ le32_to_cpu(mst->flags)); ++ printk(KERN_DEBUG "\tlog_lnum %u\n", ++ le32_to_cpu(mst->log_lnum)); ++ printk(KERN_DEBUG "\troot_lnum %u\n", ++ le32_to_cpu(mst->root_lnum)); ++ printk(KERN_DEBUG "\troot_offs %u\n", ++ le32_to_cpu(mst->root_offs)); ++ printk(KERN_DEBUG "\troot_len %u\n", ++ le32_to_cpu(mst->root_len)); ++ printk(KERN_DEBUG "\tgc_lnum %u\n", ++ le32_to_cpu(mst->gc_lnum)); ++ printk(KERN_DEBUG "\tihead_lnum %u\n", ++ le32_to_cpu(mst->ihead_lnum)); ++ printk(KERN_DEBUG "\tihead_offs %u\n", ++ le32_to_cpu(mst->ihead_offs)); ++ printk(KERN_DEBUG "\tindex_size %llu\n", ++ (unsigned long long)le64_to_cpu(mst->index_size)); ++ printk(KERN_DEBUG "\tlpt_lnum %u\n", ++ le32_to_cpu(mst->lpt_lnum)); ++ printk(KERN_DEBUG "\tlpt_offs %u\n", ++ le32_to_cpu(mst->lpt_offs)); ++ printk(KERN_DEBUG "\tnhead_lnum %u\n", ++ le32_to_cpu(mst->nhead_lnum)); ++ printk(KERN_DEBUG "\tnhead_offs %u\n", ++ le32_to_cpu(mst->nhead_offs)); ++ printk(KERN_DEBUG "\tltab_lnum %u\n", ++ le32_to_cpu(mst->ltab_lnum)); ++ printk(KERN_DEBUG "\tltab_offs %u\n", ++ le32_to_cpu(mst->ltab_offs)); ++ printk(KERN_DEBUG "\tlsave_lnum %u\n", ++ le32_to_cpu(mst->lsave_lnum)); ++ printk(KERN_DEBUG "\tlsave_offs %u\n", ++ le32_to_cpu(mst->lsave_offs)); ++ printk(KERN_DEBUG "\tlscan_lnum %u\n", ++ le32_to_cpu(mst->lscan_lnum)); ++ printk(KERN_DEBUG "\tleb_cnt %u\n", ++ le32_to_cpu(mst->leb_cnt)); ++ printk(KERN_DEBUG "\tempty_lebs %u\n", ++ le32_to_cpu(mst->empty_lebs)); ++ printk(KERN_DEBUG "\tidx_lebs %u\n", ++ le32_to_cpu(mst->idx_lebs)); ++ printk(KERN_DEBUG "\ttotal_free %llu\n", ++ (unsigned long long)le64_to_cpu(mst->total_free)); ++ printk(KERN_DEBUG "\ttotal_dirty %llu\n", ++ (unsigned long long)le64_to_cpu(mst->total_dirty)); ++ printk(KERN_DEBUG "\ttotal_used %llu\n", ++ (unsigned long long)le64_to_cpu(mst->total_used)); ++ printk(KERN_DEBUG "\ttotal_dead %llu\n", ++ (unsigned long long)le64_to_cpu(mst->total_dead)); ++ printk(KERN_DEBUG "\ttotal_dark %llu\n", ++ (unsigned long long)le64_to_cpu(mst->total_dark)); ++ break; ++ } ++ case UBIFS_REF_NODE: ++ { ++ const struct ubifs_ref_node *ref = node; ++ ++ printk(KERN_DEBUG "\tlnum %u\n", ++ le32_to_cpu(ref->lnum)); ++ printk(KERN_DEBUG "\toffs %u\n", ++ le32_to_cpu(ref->offs)); ++ printk(KERN_DEBUG "\tjhead %u\n", ++ le32_to_cpu(ref->jhead)); ++ break; ++ } ++ case UBIFS_INO_NODE: ++ { ++ const struct ubifs_ino_node *ino = node; ++ ++ key_read(c, &ino->key, &key); ++ printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); ++ printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ++ (unsigned long long)le64_to_cpu(ino->creat_sqnum)); ++ printk(KERN_DEBUG "\tsize %llu\n", ++ (unsigned long long)le64_to_cpu(ino->size)); ++ printk(KERN_DEBUG "\tnlink %u\n", ++ le32_to_cpu(ino->nlink)); ++ printk(KERN_DEBUG "\tatime %lld.%u\n", ++ (long long)le64_to_cpu(ino->atime_sec), ++ le32_to_cpu(ino->atime_nsec)); ++ printk(KERN_DEBUG "\tmtime %lld.%u\n", ++ (long long)le64_to_cpu(ino->mtime_sec), ++ le32_to_cpu(ino->mtime_nsec)); ++ printk(KERN_DEBUG "\tctime %lld.%u\n", ++ (long long)le64_to_cpu(ino->ctime_sec), ++ le32_to_cpu(ino->ctime_nsec)); ++ printk(KERN_DEBUG "\tuid %u\n", ++ le32_to_cpu(ino->uid)); ++ printk(KERN_DEBUG "\tgid %u\n", ++ le32_to_cpu(ino->gid)); ++ printk(KERN_DEBUG "\tmode %u\n", ++ le32_to_cpu(ino->mode)); ++ printk(KERN_DEBUG "\tflags %#x\n", ++ le32_to_cpu(ino->flags)); ++ printk(KERN_DEBUG "\txattr_cnt %u\n", ++ le32_to_cpu(ino->xattr_cnt)); ++ printk(KERN_DEBUG "\txattr_size %u\n", ++ le32_to_cpu(ino->xattr_size)); ++ printk(KERN_DEBUG "\txattr_names %u\n", ++ le32_to_cpu(ino->xattr_names)); ++ printk(KERN_DEBUG "\tcompr_type %#x\n", ++ (int)le16_to_cpu(ino->compr_type)); ++ printk(KERN_DEBUG "\tdata len %u\n", ++ le32_to_cpu(ino->data_len)); ++ break; ++ } ++ case UBIFS_DENT_NODE: ++ case UBIFS_XENT_NODE: ++ { ++ const struct ubifs_dent_node *dent = node; ++ int nlen = le16_to_cpu(dent->nlen); ++ ++ key_read(c, &dent->key, &key); ++ printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); ++ printk(KERN_DEBUG "\tinum %llu\n", ++ (unsigned long long)le64_to_cpu(dent->inum)); ++ printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); ++ printk(KERN_DEBUG "\tnlen %d\n", nlen); ++ printk(KERN_DEBUG "\tname "); ++ ++ if (nlen > UBIFS_MAX_NLEN) ++ printk(KERN_DEBUG "(bad name length, not printing, " ++ "bad or corrupted node)"); ++ else { ++ for (i = 0; i < nlen && dent->name[i]; i++) ++ printk(KERN_CONT "%c", dent->name[i]); ++ } ++ printk(KERN_CONT "\n"); ++ ++ break; ++ } ++ case UBIFS_DATA_NODE: ++ { ++ const struct ubifs_data_node *dn = node; ++ int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; ++ ++ key_read(c, &dn->key, &key); ++ printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); ++ printk(KERN_DEBUG "\tsize %u\n", ++ le32_to_cpu(dn->size)); ++ printk(KERN_DEBUG "\tcompr_typ %d\n", ++ (int)le16_to_cpu(dn->compr_type)); ++ printk(KERN_DEBUG "\tdata size %d\n", ++ dlen); ++ printk(KERN_DEBUG "\tdata:\n"); ++ print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, ++ (void *)&dn->data, dlen, 0); ++ break; ++ } ++ case UBIFS_TRUN_NODE: ++ { ++ const struct ubifs_trun_node *trun = node; ++ ++ printk(KERN_DEBUG "\tinum %u\n", ++ le32_to_cpu(trun->inum)); ++ printk(KERN_DEBUG "\told_size %llu\n", ++ (unsigned long long)le64_to_cpu(trun->old_size)); ++ printk(KERN_DEBUG "\tnew_size %llu\n", ++ (unsigned long long)le64_to_cpu(trun->new_size)); ++ break; ++ } ++ case UBIFS_IDX_NODE: ++ { ++ const struct ubifs_idx_node *idx = node; ++ ++ n = le16_to_cpu(idx->child_cnt); ++ printk(KERN_DEBUG "\tchild_cnt %d\n", n); ++ printk(KERN_DEBUG "\tlevel %d\n", ++ (int)le16_to_cpu(idx->level)); ++ printk(KERN_DEBUG "\tBranches:\n"); ++ ++ for (i = 0; i < n && i < c->fanout - 1; i++) { ++ const struct ubifs_branch *br; ++ ++ br = ubifs_idx_branch(c, idx, i); ++ key_read(c, &br->key, &key); ++ printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", ++ i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), ++ le32_to_cpu(br->len), DBGKEY(&key)); ++ } ++ break; ++ } ++ case UBIFS_CS_NODE: ++ break; ++ case UBIFS_ORPH_NODE: ++ { ++ const struct ubifs_orph_node *orph = node; ++ ++ printk(KERN_DEBUG "\tcommit number %llu\n", ++ (unsigned long long) ++ le64_to_cpu(orph->cmt_no) & LLONG_MAX); ++ printk(KERN_DEBUG "\tlast node flag %llu\n", ++ (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); ++ n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; ++ printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); ++ for (i = 0; i < n; i++) ++ printk(KERN_DEBUG "\t ino %llu\n", ++ (unsigned long long)le64_to_cpu(orph->inos[i])); ++ break; ++ } ++ default: ++ printk(KERN_DEBUG "node type %d was not recognized\n", ++ (int)ch->node_type); ++ } ++ spin_unlock(&dbg_lock); ++} ++ ++void dbg_dump_budget_req(const struct ubifs_budget_req *req) ++{ ++ spin_lock(&dbg_lock); ++ printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", ++ req->new_ino, req->dirtied_ino); ++ printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", ++ req->new_ino_d, req->dirtied_ino_d); ++ printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", ++ req->new_page, req->dirtied_page); ++ printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", ++ req->new_dent, req->mod_dent); ++ printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); ++ printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", ++ req->data_growth, req->dd_growth); ++ spin_unlock(&dbg_lock); ++} ++ ++void dbg_dump_lstats(const struct ubifs_lp_stats *lst) ++{ ++ spin_lock(&dbg_lock); ++ printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " ++ "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); ++ printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " ++ "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, ++ lst->total_dirty); ++ printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " ++ "total_dead %lld\n", lst->total_used, lst->total_dark, ++ lst->total_dead); ++ spin_unlock(&dbg_lock); ++} ++ ++void dbg_dump_budg(struct ubifs_info *c) ++{ ++ int i; ++ struct rb_node *rb; ++ struct ubifs_bud *bud; ++ struct ubifs_gced_idx_leb *idx_gc; ++ long long available, outstanding, free; ++ ++ ubifs_assert(spin_is_locked(&c->space_lock)); ++ spin_lock(&dbg_lock); ++ printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " ++ "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, ++ c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); ++ printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " ++ "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, ++ c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, ++ c->freeable_cnt); ++ printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " ++ "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, ++ c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); ++ printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " ++ "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), ++ atomic_long_read(&c->dirty_zn_cnt), ++ atomic_long_read(&c->clean_zn_cnt)); ++ printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", ++ c->dark_wm, c->dead_wm, c->max_idx_node_sz); ++ printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", ++ c->gc_lnum, c->ihead_lnum); ++ /* If we are in R/O mode, journal heads do not exist */ ++ if (c->jheads) ++ for (i = 0; i < c->jhead_cnt; i++) ++ printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", ++ c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); ++ for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { ++ bud = rb_entry(rb, struct ubifs_bud, rb); ++ printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); ++ } ++ list_for_each_entry(bud, &c->old_buds, list) ++ printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); ++ list_for_each_entry(idx_gc, &c->idx_gc, list) ++ printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", ++ idx_gc->lnum, idx_gc->unmap); ++ printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); ++ ++ /* Print budgeting predictions */ ++ available = ubifs_calc_available(c, c->min_idx_lebs); ++ outstanding = c->budg_data_growth + c->budg_dd_growth; ++ free = ubifs_get_free_space_nolock(c); ++ printk(KERN_DEBUG "Budgeting predictions:\n"); ++ printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", ++ available, outstanding, free); ++ spin_unlock(&dbg_lock); ++} ++ ++void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) ++{ ++ printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " ++ "flags %#x\n", lp->lnum, lp->free, lp->dirty, ++ c->leb_size - lp->free - lp->dirty, lp->flags); ++} ++ ++void dbg_dump_lprops(struct ubifs_info *c) ++{ ++ int lnum, err; ++ struct ubifs_lprops lp; ++ struct ubifs_lp_stats lst; ++ ++ printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", ++ current->pid); ++ ubifs_get_lp_stats(c, &lst); ++ dbg_dump_lstats(&lst); ++ ++ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { ++ err = ubifs_read_one_lp(c, lnum, &lp); ++ if (err) ++ ubifs_err("cannot read lprops for LEB %d", lnum); ++ ++ dbg_dump_lprop(c, &lp); ++ } ++ printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", ++ current->pid); ++} ++ ++void dbg_dump_lpt_info(struct ubifs_info *c) ++{ ++ int i; ++ ++ spin_lock(&dbg_lock); ++ printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); ++ printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); ++ printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); ++ printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); ++ printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); ++ printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); ++ printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); ++ printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); ++ printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); ++ printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); ++ printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); ++ printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); ++ printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); ++ printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); ++ printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); ++ printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); ++ printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); ++ printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); ++ printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); ++ printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); ++ printk(KERN_DEBUG "\tLPT head is at %d:%d\n", ++ c->nhead_lnum, c->nhead_offs); ++ printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", ++ c->ltab_lnum, c->ltab_offs); ++ if (c->big_lpt) ++ printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", ++ c->lsave_lnum, c->lsave_offs); ++ for (i = 0; i < c->lpt_lebs; i++) ++ printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " ++ "cmt %d\n", i + c->lpt_first, c->ltab[i].free, ++ c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); ++ spin_unlock(&dbg_lock); ++} ++ ++void dbg_dump_leb(const struct ubifs_info *c, int lnum) ++{ ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ ++ if (dbg_failure_mode) ++ return; ++ ++ printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", ++ current->pid, lnum); ++ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); ++ if (IS_ERR(sleb)) { ++ ubifs_err("scan error %d", (int)PTR_ERR(sleb)); ++ return; ++ } ++ ++ printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, ++ sleb->nodes_cnt, sleb->endpt); ++ ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ cond_resched(); ++ printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, ++ snod->offs, snod->len); ++ dbg_dump_node(c, snod->node); ++ } ++ ++ printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", ++ current->pid, lnum); ++ ubifs_scan_destroy(sleb); ++ return; ++} ++ ++void dbg_dump_znode(const struct ubifs_info *c, ++ const struct ubifs_znode *znode) ++{ ++ int n; ++ const struct ubifs_zbranch *zbr; ++ ++ spin_lock(&dbg_lock); ++ if (znode->parent) ++ zbr = &znode->parent->zbranch[znode->iip]; ++ else ++ zbr = &c->zroot; ++ ++ printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" ++ " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, ++ zbr->len, znode->parent, znode->iip, znode->level, ++ znode->child_cnt, znode->flags); ++ ++ if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { ++ spin_unlock(&dbg_lock); ++ return; ++ } ++ ++ printk(KERN_DEBUG "zbranches:\n"); ++ for (n = 0; n < znode->child_cnt; n++) { ++ zbr = &znode->zbranch[n]; ++ if (znode->level > 0) ++ printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " ++ "%s\n", n, zbr->znode, zbr->lnum, ++ zbr->offs, zbr->len, ++ DBGKEY(&zbr->key)); ++ else ++ printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " ++ "%s\n", n, zbr->znode, zbr->lnum, ++ zbr->offs, zbr->len, ++ DBGKEY(&zbr->key)); ++ } ++ spin_unlock(&dbg_lock); ++} ++ ++void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) ++{ ++ int i; ++ ++ printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", ++ current->pid, cat, heap->cnt); ++ for (i = 0; i < heap->cnt; i++) { ++ struct ubifs_lprops *lprops = heap->arr[i]; ++ ++ printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " ++ "flags %d\n", i, lprops->lnum, lprops->hpos, ++ lprops->free, lprops->dirty, lprops->flags); ++ } ++ printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); ++} ++ ++void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, ++ struct ubifs_nnode *parent, int iip) ++{ ++ int i; ++ ++ printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); ++ printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", ++ (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); ++ printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", ++ pnode->flags, iip, pnode->level, pnode->num); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ struct ubifs_lprops *lp = &pnode->lprops[i]; ++ ++ printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", ++ i, lp->free, lp->dirty, lp->flags, lp->lnum); ++ } ++} ++ ++void dbg_dump_tnc(struct ubifs_info *c) ++{ ++ struct ubifs_znode *znode; ++ int level; ++ ++ printk(KERN_DEBUG "\n"); ++ printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); ++ znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); ++ level = znode->level; ++ printk(KERN_DEBUG "== Level %d ==\n", level); ++ while (znode) { ++ if (level != znode->level) { ++ level = znode->level; ++ printk(KERN_DEBUG "== Level %d ==\n", level); ++ } ++ dbg_dump_znode(c, znode); ++ znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); ++ } ++ printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); ++} ++ ++static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, ++ void *priv) ++{ ++ dbg_dump_znode(c, znode); ++ return 0; ++} ++ ++/** ++ * dbg_dump_index - dump the on-flash index. ++ * @c: UBIFS file-system description object ++ * ++ * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()' ++ * which dumps only in-memory znodes and does not read znodes which from flash. ++ */ ++void dbg_dump_index(struct ubifs_info *c) ++{ ++ dbg_walk_index(c, NULL, dump_znode, NULL); ++} ++ ++/** ++ * dbg_save_space_info - save information about flash space. ++ * @c: UBIFS file-system description object ++ * ++ * This function saves information about UBIFS free space, dirty space, etc, in ++ * order to check it later. ++ */ ++void dbg_save_space_info(struct ubifs_info *c) ++{ ++ struct ubifs_debug_info *d = c->dbg; ++ ++ ubifs_get_lp_stats(c, &d->saved_lst); ++ ++ spin_lock(&c->space_lock); ++ d->saved_free = ubifs_get_free_space_nolock(c); ++ spin_unlock(&c->space_lock); ++} ++ ++/** ++ * dbg_check_space_info - check flash space information. ++ * @c: UBIFS file-system description object ++ * ++ * This function compares current flash space information with the information ++ * which was saved when the 'dbg_save_space_info()' function was called. ++ * Returns zero if the information has not changed, and %-EINVAL it it has ++ * changed. ++ */ ++int dbg_check_space_info(struct ubifs_info *c) ++{ ++ struct ubifs_debug_info *d = c->dbg; ++ struct ubifs_lp_stats lst; ++ long long avail, free; ++ ++ spin_lock(&c->space_lock); ++ avail = ubifs_calc_available(c, c->min_idx_lebs); ++ spin_unlock(&c->space_lock); ++ free = ubifs_get_free_space(c); ++ ++ if (free != d->saved_free) { ++ ubifs_err("free space changed from %lld to %lld", ++ d->saved_free, free); ++ goto out; ++ } ++ ++ return 0; ++ ++out: ++ ubifs_msg("saved lprops statistics dump"); ++ dbg_dump_lstats(&d->saved_lst); ++ ubifs_get_lp_stats(c, &lst); ++ ubifs_msg("current lprops statistics dump"); ++ dbg_dump_lstats(&d->saved_lst); ++ spin_lock(&c->space_lock); ++ dbg_dump_budg(c); ++ spin_unlock(&c->space_lock); ++ dump_stack(); ++ return -EINVAL; ++} ++ ++/** ++ * dbg_check_synced_i_size - check synchronized inode size. ++ * @inode: inode to check ++ * ++ * If inode is clean, synchronized inode size has to be equivalent to current ++ * inode size. This function has to be called only for locked inodes (@i_mutex ++ * has to be locked). Returns %0 if synchronized inode size if correct, and ++ * %-EINVAL if not. ++ */ ++int dbg_check_synced_i_size(struct inode *inode) ++{ ++ int err = 0; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ if (!S_ISREG(inode->i_mode)) ++ return 0; ++ ++ mutex_lock(&ui->ui_mutex); ++ spin_lock(&ui->ui_lock); ++ if (ui->ui_size != ui->synced_i_size && !ui->dirty) { ++ ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " ++ "is clean", ui->ui_size, ui->synced_i_size); ++ ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, ++ inode->i_mode, i_size_read(inode)); ++ dbg_dump_stack(); ++ err = -EINVAL; ++ } ++ spin_unlock(&ui->ui_lock); ++ mutex_unlock(&ui->ui_mutex); ++ return err; ++} ++ ++/* ++ * dbg_check_dir - check directory inode size and link count. ++ * @c: UBIFS file-system description object ++ * @dir: the directory to calculate size for ++ * @size: the result is returned here ++ * ++ * This function makes sure that directory size and link count are correct. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ * ++ * Note, it is good idea to make sure the @dir->i_mutex is locked before ++ * calling this function. ++ */ ++int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) ++{ ++ unsigned int nlink = 2; ++ union ubifs_key key; ++ struct ubifs_dent_node *dent, *pdent = NULL; ++ struct qstr nm = { .name = NULL }; ++ loff_t size = UBIFS_INO_NODE_SZ; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ ++ if (!S_ISDIR(dir->i_mode)) ++ return 0; ++ ++ lowest_dent_key(c, &key, dir->i_ino); ++ while (1) { ++ int err; ++ ++ dent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(dent)) { ++ err = PTR_ERR(dent); ++ if (err == -ENOENT) ++ break; ++ return err; ++ } ++ ++ nm.name = dent->name; ++ nm.len = le16_to_cpu(dent->nlen); ++ size += CALC_DENT_SIZE(nm.len); ++ if (dent->type == UBIFS_ITYPE_DIR) ++ nlink += 1; ++ kfree(pdent); ++ pdent = dent; ++ key_read(c, &dent->key, &key); ++ } ++ kfree(pdent); ++ ++ if (i_size_read(dir) != size) { ++ ubifs_err("directory inode %lu has size %llu, " ++ "but calculated size is %llu", dir->i_ino, ++ (unsigned long long)i_size_read(dir), ++ (unsigned long long)size); ++ dump_stack(); ++ return -EINVAL; ++ } ++ if (dir->i_nlink != nlink) { ++ ubifs_err("directory inode %lu has nlink %u, but calculated " ++ "nlink is %u", dir->i_ino, dir->i_nlink, nlink); ++ dump_stack(); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * dbg_check_key_order - make sure that colliding keys are properly ordered. ++ * @c: UBIFS file-system description object ++ * @zbr1: first zbranch ++ * @zbr2: following zbranch ++ * ++ * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of ++ * names of the direntries/xentries which are referred by the keys. This ++ * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes ++ * sure the name of direntry/xentry referred by @zbr1 is less than ++ * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not, ++ * and a negative error code in case of failure. ++ */ ++static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, ++ struct ubifs_zbranch *zbr2) ++{ ++ int err, nlen1, nlen2, cmp; ++ struct ubifs_dent_node *dent1, *dent2; ++ union ubifs_key key; ++ ++ ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); ++ dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); ++ if (!dent1) ++ return -ENOMEM; ++ dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); ++ if (!dent2) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ ++ err = ubifs_tnc_read_node(c, zbr1, dent1); ++ if (err) ++ goto out_free; ++ err = ubifs_validate_entry(c, dent1); ++ if (err) ++ goto out_free; ++ ++ err = ubifs_tnc_read_node(c, zbr2, dent2); ++ if (err) ++ goto out_free; ++ err = ubifs_validate_entry(c, dent2); ++ if (err) ++ goto out_free; ++ ++ /* Make sure node keys are the same as in zbranch */ ++ err = 1; ++ key_read(c, &dent1->key, &key); ++ if (keys_cmp(c, &zbr1->key, &key)) { ++ dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, ++ zbr1->offs, DBGKEY(&key)); ++ dbg_err("but it should have key %s according to tnc", ++ DBGKEY(&zbr1->key)); ++ dbg_dump_node(c, dent1); ++ goto out_free; ++ } ++ ++ key_read(c, &dent2->key, &key); ++ if (keys_cmp(c, &zbr2->key, &key)) { ++ dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, ++ zbr1->offs, DBGKEY(&key)); ++ dbg_err("but it should have key %s according to tnc", ++ DBGKEY(&zbr2->key)); ++ dbg_dump_node(c, dent2); ++ goto out_free; ++ } ++ ++ nlen1 = le16_to_cpu(dent1->nlen); ++ nlen2 = le16_to_cpu(dent2->nlen); ++ ++ cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2)); ++ if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) { ++ err = 0; ++ goto out_free; ++ } ++ if (cmp == 0 && nlen1 == nlen2) ++ dbg_err("2 xent/dent nodes with the same name"); ++ else ++ dbg_err("bad order of colliding key %s", ++ DBGKEY(&key)); ++ ++ ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); ++ dbg_dump_node(c, dent1); ++ ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); ++ dbg_dump_node(c, dent2); ++ ++out_free: ++ kfree(dent2); ++ kfree(dent1); ++ return err; ++} ++ ++/** ++ * dbg_check_znode - check if znode is all right. ++ * @c: UBIFS file-system description object ++ * @zbr: zbranch which points to this znode ++ * ++ * This function makes sure that znode referred to by @zbr is all right. ++ * Returns zero if it is, and %-EINVAL if it is not. ++ */ ++static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) ++{ ++ struct ubifs_znode *znode = zbr->znode; ++ struct ubifs_znode *zp = znode->parent; ++ int n, err, cmp; ++ ++ if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { ++ err = 1; ++ goto out; ++ } ++ if (znode->level < 0) { ++ err = 2; ++ goto out; ++ } ++ if (znode->iip < 0 || znode->iip >= c->fanout) { ++ err = 3; ++ goto out; ++ } ++ ++ if (zbr->len == 0) ++ /* Only dirty zbranch may have no on-flash nodes */ ++ if (!ubifs_zn_dirty(znode)) { ++ err = 4; ++ goto out; ++ } ++ ++ if (ubifs_zn_dirty(znode)) { ++ /* ++ * If znode is dirty, its parent has to be dirty as well. The ++ * order of the operation is important, so we have to have ++ * memory barriers. ++ */ ++ smp_mb(); ++ if (zp && !ubifs_zn_dirty(zp)) { ++ /* ++ * The dirty flag is atomic and is cleared outside the ++ * TNC mutex, so znode's dirty flag may now have ++ * been cleared. The child is always cleared before the ++ * parent, so we just need to check again. ++ */ ++ smp_mb(); ++ if (ubifs_zn_dirty(znode)) { ++ err = 5; ++ goto out; ++ } ++ } ++ } ++ ++ if (zp) { ++ const union ubifs_key *min, *max; ++ ++ if (znode->level != zp->level - 1) { ++ err = 6; ++ goto out; ++ } ++ ++ /* Make sure the 'parent' pointer in our znode is correct */ ++ err = ubifs_search_zbranch(c, zp, &zbr->key, &n); ++ if (!err) { ++ /* This zbranch does not exist in the parent */ ++ err = 7; ++ goto out; ++ } ++ ++ if (znode->iip >= zp->child_cnt) { ++ err = 8; ++ goto out; ++ } ++ ++ if (znode->iip != n) { ++ /* This may happen only in case of collisions */ ++ if (keys_cmp(c, &zp->zbranch[n].key, ++ &zp->zbranch[znode->iip].key)) { ++ err = 9; ++ goto out; ++ } ++ n = znode->iip; ++ } ++ ++ /* ++ * Make sure that the first key in our znode is greater than or ++ * equal to the key in the pointing zbranch. ++ */ ++ min = &zbr->key; ++ cmp = keys_cmp(c, min, &znode->zbranch[0].key); ++ if (cmp == 1) { ++ err = 10; ++ goto out; ++ } ++ ++ if (n + 1 < zp->child_cnt) { ++ max = &zp->zbranch[n + 1].key; ++ ++ /* ++ * Make sure the last key in our znode is less or ++ * equivalent than the key in the zbranch which goes ++ * after our pointing zbranch. ++ */ ++ cmp = keys_cmp(c, max, ++ &znode->zbranch[znode->child_cnt - 1].key); ++ if (cmp == -1) { ++ err = 11; ++ goto out; ++ } ++ } ++ } else { ++ /* This may only be root znode */ ++ if (zbr != &c->zroot) { ++ err = 12; ++ goto out; ++ } ++ } ++ ++ /* ++ * Make sure that next key is greater or equivalent then the previous ++ * one. ++ */ ++ for (n = 1; n < znode->child_cnt; n++) { ++ cmp = keys_cmp(c, &znode->zbranch[n - 1].key, ++ &znode->zbranch[n].key); ++ if (cmp > 0) { ++ err = 13; ++ goto out; ++ } ++ if (cmp == 0) { ++ /* This can only be keys with colliding hash */ ++ if (!is_hash_key(c, &znode->zbranch[n].key)) { ++ err = 14; ++ goto out; ++ } ++ ++ if (znode->level != 0 || c->replaying) ++ continue; ++ ++ /* ++ * Colliding keys should follow binary order of ++ * corresponding xentry/dentry names. ++ */ ++ err = dbg_check_key_order(c, &znode->zbranch[n - 1], ++ &znode->zbranch[n]); ++ if (err < 0) ++ return err; ++ if (err) { ++ err = 15; ++ goto out; ++ } ++ } ++ } ++ ++ for (n = 0; n < znode->child_cnt; n++) { ++ if (!znode->zbranch[n].znode && ++ (znode->zbranch[n].lnum == 0 || ++ znode->zbranch[n].len == 0)) { ++ err = 16; ++ goto out; ++ } ++ ++ if (znode->zbranch[n].lnum != 0 && ++ znode->zbranch[n].len == 0) { ++ err = 17; ++ goto out; ++ } ++ ++ if (znode->zbranch[n].lnum == 0 && ++ znode->zbranch[n].len != 0) { ++ err = 18; ++ goto out; ++ } ++ ++ if (znode->zbranch[n].lnum == 0 && ++ znode->zbranch[n].offs != 0) { ++ err = 19; ++ goto out; ++ } ++ ++ if (znode->level != 0 && znode->zbranch[n].znode) ++ if (znode->zbranch[n].znode->parent != znode) { ++ err = 20; ++ goto out; ++ } ++ } ++ ++ return 0; ++ ++out: ++ ubifs_err("failed, error %d", err); ++ ubifs_msg("dump of the znode"); ++ dbg_dump_znode(c, znode); ++ if (zp) { ++ ubifs_msg("dump of the parent znode"); ++ dbg_dump_znode(c, zp); ++ } ++ dump_stack(); ++ return -EINVAL; ++} ++ ++/** ++ * dbg_check_tnc - check TNC tree. ++ * @c: UBIFS file-system description object ++ * @extra: do extra checks that are possible at start commit ++ * ++ * This function traverses whole TNC tree and checks every znode. Returns zero ++ * if everything is all right and %-EINVAL if something is wrong with TNC. ++ */ ++int dbg_check_tnc(struct ubifs_info *c, int extra) ++{ ++ struct ubifs_znode *znode; ++ long clean_cnt = 0, dirty_cnt = 0; ++ int err, last; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) ++ return 0; ++ ++ ubifs_assert(mutex_is_locked(&c->tnc_mutex)); ++ if (!c->zroot.znode) ++ return 0; ++ ++ znode = ubifs_tnc_postorder_first(c->zroot.znode); ++ while (1) { ++ struct ubifs_znode *prev; ++ struct ubifs_zbranch *zbr; ++ ++ if (!znode->parent) ++ zbr = &c->zroot; ++ else ++ zbr = &znode->parent->zbranch[znode->iip]; ++ ++ err = dbg_check_znode(c, zbr); ++ if (err) ++ return err; ++ ++ if (extra) { ++ if (ubifs_zn_dirty(znode)) ++ dirty_cnt += 1; ++ else ++ clean_cnt += 1; ++ } ++ ++ prev = znode; ++ znode = ubifs_tnc_postorder_next(znode); ++ if (!znode) ++ break; ++ ++ /* ++ * If the last key of this znode is equivalent to the first key ++ * of the next znode (collision), then check order of the keys. ++ */ ++ last = prev->child_cnt - 1; ++ if (prev->level == 0 && znode->level == 0 && !c->replaying && ++ !keys_cmp(c, &prev->zbranch[last].key, ++ &znode->zbranch[0].key)) { ++ err = dbg_check_key_order(c, &prev->zbranch[last], ++ &znode->zbranch[0]); ++ if (err < 0) ++ return err; ++ if (err) { ++ ubifs_msg("first znode"); ++ dbg_dump_znode(c, prev); ++ ubifs_msg("second znode"); ++ dbg_dump_znode(c, znode); ++ return -EINVAL; ++ } ++ } ++ } ++ ++ if (extra) { ++ if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { ++ ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", ++ atomic_long_read(&c->clean_zn_cnt), ++ clean_cnt); ++ return -EINVAL; ++ } ++ if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { ++ ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", ++ atomic_long_read(&c->dirty_zn_cnt), ++ dirty_cnt); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * dbg_walk_index - walk the on-flash index. ++ * @c: UBIFS file-system description object ++ * @leaf_cb: called for each leaf node ++ * @znode_cb: called for each indexing node ++ * @priv: private data which is passed to callbacks ++ * ++ * This function walks the UBIFS index and calls the @leaf_cb for each leaf ++ * node and @znode_cb for each indexing node. Returns zero in case of success ++ * and a negative error code in case of failure. ++ * ++ * It would be better if this function removed every znode it pulled to into ++ * the TNC, so that the behavior more closely matched the non-debugging ++ * behavior. ++ */ ++int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, ++ dbg_znode_callback znode_cb, void *priv) ++{ ++ int err; ++ struct ubifs_zbranch *zbr; ++ struct ubifs_znode *znode, *child; ++ ++ mutex_lock(&c->tnc_mutex); ++ /* If the root indexing node is not in TNC - pull it */ ++ if (!c->zroot.znode) { ++ c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0); ++ if (IS_ERR(c->zroot.znode)) { ++ err = PTR_ERR(c->zroot.znode); ++ c->zroot.znode = NULL; ++ goto out_unlock; ++ } ++ } ++ ++ /* ++ * We are going to traverse the indexing tree in the postorder manner. ++ * Go down and find the leftmost indexing node where we are going to ++ * start from. ++ */ ++ znode = c->zroot.znode; ++ while (znode->level > 0) { ++ zbr = &znode->zbranch[0]; ++ child = zbr->znode; ++ if (!child) { ++ child = ubifs_load_znode(c, zbr, znode, 0); ++ if (IS_ERR(child)) { ++ err = PTR_ERR(child); ++ goto out_unlock; ++ } ++ zbr->znode = child; ++ } ++ ++ znode = child; ++ } ++ ++ /* Iterate over all indexing nodes */ ++ while (1) { ++ int idx; ++ ++ cond_resched(); ++ ++ if (znode_cb) { ++ err = znode_cb(c, znode, priv); ++ if (err) { ++ ubifs_err("znode checking function returned " ++ "error %d", err); ++ dbg_dump_znode(c, znode); ++ goto out_dump; ++ } ++ } ++ if (leaf_cb && znode->level == 0) { ++ for (idx = 0; idx < znode->child_cnt; idx++) { ++ zbr = &znode->zbranch[idx]; ++ err = leaf_cb(c, zbr, priv); ++ if (err) { ++ ubifs_err("leaf checking function " ++ "returned error %d, for leaf " ++ "at LEB %d:%d", ++ err, zbr->lnum, zbr->offs); ++ goto out_dump; ++ } ++ } ++ } ++ ++ if (!znode->parent) ++ break; ++ ++ idx = znode->iip + 1; ++ znode = znode->parent; ++ if (idx < znode->child_cnt) { ++ /* Switch to the next index in the parent */ ++ zbr = &znode->zbranch[idx]; ++ child = zbr->znode; ++ if (!child) { ++ child = ubifs_load_znode(c, zbr, znode, idx); ++ if (IS_ERR(child)) { ++ err = PTR_ERR(child); ++ goto out_unlock; ++ } ++ zbr->znode = child; ++ } ++ znode = child; ++ } else ++ /* ++ * This is the last child, switch to the parent and ++ * continue. ++ */ ++ continue; ++ ++ /* Go to the lowest leftmost znode in the new sub-tree */ ++ while (znode->level > 0) { ++ zbr = &znode->zbranch[0]; ++ child = zbr->znode; ++ if (!child) { ++ child = ubifs_load_znode(c, zbr, znode, 0); ++ if (IS_ERR(child)) { ++ err = PTR_ERR(child); ++ goto out_unlock; ++ } ++ zbr->znode = child; ++ } ++ znode = child; ++ } ++ } ++ ++ mutex_unlock(&c->tnc_mutex); ++ return 0; ++ ++out_dump: ++ if (znode->parent) ++ zbr = &znode->parent->zbranch[znode->iip]; ++ else ++ zbr = &c->zroot; ++ ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); ++ dbg_dump_znode(c, znode); ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * add_size - add znode size to partially calculated index size. ++ * @c: UBIFS file-system description object ++ * @znode: znode to add size for ++ * @priv: partially calculated index size ++ * ++ * This is a helper function for 'dbg_check_idx_size()' which is called for ++ * every indexing node and adds its size to the 'long long' variable pointed to ++ * by @priv. ++ */ ++static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv) ++{ ++ long long *idx_size = priv; ++ int add; ++ ++ add = ubifs_idx_node_sz(c, znode->child_cnt); ++ add = ALIGN(add, 8); ++ *idx_size += add; ++ return 0; ++} ++ ++/** ++ * dbg_check_idx_size - check index size. ++ * @c: UBIFS file-system description object ++ * @idx_size: size to check ++ * ++ * This function walks the UBIFS index, calculates its size and checks that the ++ * size is equivalent to @idx_size. Returns zero in case of success and a ++ * negative error code in case of failure. ++ */ ++int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) ++{ ++ int err; ++ long long calc = 0; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) ++ return 0; ++ ++ err = dbg_walk_index(c, NULL, add_size, &calc); ++ if (err) { ++ ubifs_err("error %d while walking the index", err); ++ return err; ++ } ++ ++ if (calc != idx_size) { ++ ubifs_err("index size check failed: calculated size is %lld, " ++ "should be %lld", calc, idx_size); ++ dump_stack(); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * struct fsck_inode - information about an inode used when checking the file-system. ++ * @rb: link in the RB-tree of inodes ++ * @inum: inode number ++ * @mode: inode type, permissions, etc ++ * @nlink: inode link count ++ * @xattr_cnt: count of extended attributes ++ * @references: how many directory/xattr entries refer this inode (calculated ++ * while walking the index) ++ * @calc_cnt: for directory inode count of child directories ++ * @size: inode size (read from on-flash inode) ++ * @xattr_sz: summary size of all extended attributes (read from on-flash ++ * inode) ++ * @calc_sz: for directories calculated directory size ++ * @calc_xcnt: count of extended attributes ++ * @calc_xsz: calculated summary size of all extended attributes ++ * @xattr_nms: sum of lengths of all extended attribute names belonging to this ++ * inode (read from on-flash inode) ++ * @calc_xnms: calculated sum of lengths of all extended attribute names ++ */ ++struct fsck_inode { ++ struct rb_node rb; ++ ino_t inum; ++ umode_t mode; ++ unsigned int nlink; ++ unsigned int xattr_cnt; ++ int references; ++ int calc_cnt; ++ long long size; ++ unsigned int xattr_sz; ++ long long calc_sz; ++ long long calc_xcnt; ++ long long calc_xsz; ++ unsigned int xattr_nms; ++ long long calc_xnms; ++}; ++ ++/** ++ * struct fsck_data - private FS checking information. ++ * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects) ++ */ ++struct fsck_data { ++ struct rb_root inodes; ++}; ++ ++/** ++ * add_inode - add inode information to RB-tree of inodes. ++ * @c: UBIFS file-system description object ++ * @fsckd: FS checking information ++ * @ino: raw UBIFS inode to add ++ * ++ * This is a helper function for 'check_leaf()' which adds information about ++ * inode @ino to the RB-tree of inodes. Returns inode information pointer in ++ * case of success and a negative error code in case of failure. ++ */ ++static struct fsck_inode *add_inode(struct ubifs_info *c, ++ struct fsck_data *fsckd, ++ struct ubifs_ino_node *ino) ++{ ++ struct rb_node **p, *parent = NULL; ++ struct fsck_inode *fscki; ++ ino_t inum = key_inum_flash(c, &ino->key); ++ ++ p = &fsckd->inodes.rb_node; ++ while (*p) { ++ parent = *p; ++ fscki = rb_entry(parent, struct fsck_inode, rb); ++ if (inum < fscki->inum) ++ p = &(*p)->rb_left; ++ else if (inum > fscki->inum) ++ p = &(*p)->rb_right; ++ else ++ return fscki; ++ } ++ ++ if (inum > c->highest_inum) { ++ ubifs_err("too high inode number, max. is %lu", ++ (unsigned long)c->highest_inum); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS); ++ if (!fscki) ++ return ERR_PTR(-ENOMEM); ++ ++ fscki->inum = inum; ++ fscki->nlink = le32_to_cpu(ino->nlink); ++ fscki->size = le64_to_cpu(ino->size); ++ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); ++ fscki->xattr_sz = le32_to_cpu(ino->xattr_size); ++ fscki->xattr_nms = le32_to_cpu(ino->xattr_names); ++ fscki->mode = le32_to_cpu(ino->mode); ++ if (S_ISDIR(fscki->mode)) { ++ fscki->calc_sz = UBIFS_INO_NODE_SZ; ++ fscki->calc_cnt = 2; ++ } ++ rb_link_node(&fscki->rb, parent, p); ++ rb_insert_color(&fscki->rb, &fsckd->inodes); ++ return fscki; ++} ++ ++/** ++ * search_inode - search inode in the RB-tree of inodes. ++ * @fsckd: FS checking information ++ * @inum: inode number to search ++ * ++ * This is a helper function for 'check_leaf()' which searches inode @inum in ++ * the RB-tree of inodes and returns an inode information pointer or %NULL if ++ * the inode was not found. ++ */ ++static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum) ++{ ++ struct rb_node *p; ++ struct fsck_inode *fscki; ++ ++ p = fsckd->inodes.rb_node; ++ while (p) { ++ fscki = rb_entry(p, struct fsck_inode, rb); ++ if (inum < fscki->inum) ++ p = p->rb_left; ++ else if (inum > fscki->inum) ++ p = p->rb_right; ++ else ++ return fscki; ++ } ++ return NULL; ++} ++ ++/** ++ * read_add_inode - read inode node and add it to RB-tree of inodes. ++ * @c: UBIFS file-system description object ++ * @fsckd: FS checking information ++ * @inum: inode number to read ++ * ++ * This is a helper function for 'check_leaf()' which finds inode node @inum in ++ * the index, reads it, and adds it to the RB-tree of inodes. Returns inode ++ * information pointer in case of success and a negative error code in case of ++ * failure. ++ */ ++static struct fsck_inode *read_add_inode(struct ubifs_info *c, ++ struct fsck_data *fsckd, ino_t inum) ++{ ++ int n, err; ++ union ubifs_key key; ++ struct ubifs_znode *znode; ++ struct ubifs_zbranch *zbr; ++ struct ubifs_ino_node *ino; ++ struct fsck_inode *fscki; ++ ++ fscki = search_inode(fsckd, inum); ++ if (fscki) ++ return fscki; ++ ++ ino_key_init(c, &key, inum); ++ err = ubifs_lookup_level0(c, &key, &znode, &n); ++ if (!err) { ++ ubifs_err("inode %lu not found in index", (unsigned long)inum); ++ return ERR_PTR(-ENOENT); ++ } else if (err < 0) { ++ ubifs_err("error %d while looking up inode %lu", ++ err, (unsigned long)inum); ++ return ERR_PTR(err); ++ } ++ ++ zbr = &znode->zbranch[n]; ++ if (zbr->len < UBIFS_INO_NODE_SZ) { ++ ubifs_err("bad node %lu node length %d", ++ (unsigned long)inum, zbr->len); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ ino = kmalloc(zbr->len, GFP_NOFS); ++ if (!ino) ++ return ERR_PTR(-ENOMEM); ++ ++ err = ubifs_tnc_read_node(c, zbr, ino); ++ if (err) { ++ ubifs_err("cannot read inode node at LEB %d:%d, error %d", ++ zbr->lnum, zbr->offs, err); ++ kfree(ino); ++ return ERR_PTR(err); ++ } ++ ++ fscki = add_inode(c, fsckd, ino); ++ kfree(ino); ++ if (IS_ERR(fscki)) { ++ ubifs_err("error %ld while adding inode %lu node", ++ PTR_ERR(fscki), (unsigned long)inum); ++ return fscki; ++ } ++ ++ return fscki; ++} ++ ++/** ++ * check_leaf - check leaf node. ++ * @c: UBIFS file-system description object ++ * @zbr: zbranch of the leaf node to check ++ * @priv: FS checking information ++ * ++ * This is a helper function for 'dbg_check_filesystem()' which is called for ++ * every single leaf node while walking the indexing tree. It checks that the ++ * leaf node referred from the indexing tree exists, has correct CRC, and does ++ * some other basic validation. This function is also responsible for building ++ * an RB-tree of inodes - it adds all inodes into the RB-tree. It also ++ * calculates reference count, size, etc for each inode in order to later ++ * compare them to the information stored inside the inodes and detect possible ++ * inconsistencies. Returns zero in case of success and a negative error code ++ * in case of failure. ++ */ ++static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ void *priv) ++{ ++ ino_t inum; ++ void *node; ++ int err, type = key_type(c, &zbr->key); ++ struct fsck_inode *fscki; ++ ++ if (zbr->len < UBIFS_CH_SZ) { ++ ubifs_err("bad leaf length %d (LEB %d:%d)", ++ zbr->len, zbr->lnum, zbr->offs); ++ return -EINVAL; ++ } ++ ++ node = kmalloc(zbr->len, GFP_NOFS); ++ if (!node) ++ return -ENOMEM; ++ ++ err = ubifs_tnc_read_node(c, zbr, node); ++ if (err) { ++ ubifs_err("cannot read leaf node at LEB %d:%d, error %d", ++ zbr->lnum, zbr->offs, err); ++ goto out_free; ++ } ++ ++ /* If this is an inode node, add it to RB-tree of inodes */ ++ if (type == UBIFS_INO_KEY) { ++ fscki = add_inode(c, priv, node); ++ if (IS_ERR(fscki)) { ++ err = PTR_ERR(fscki); ++ ubifs_err("error %d while adding inode node", err); ++ goto out_dump; ++ } ++ goto out; ++ } ++ ++ if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && ++ type != UBIFS_DATA_KEY) { ++ ubifs_err("unexpected node type %d at LEB %d:%d", ++ type, zbr->lnum, zbr->offs); ++ err = -EINVAL; ++ goto out_free; ++ } ++ ++ if (type == UBIFS_DATA_KEY) { ++ long long blk_offs; ++ struct ubifs_data_node *dn = node; ++ ++ /* ++ * Search the inode node this data node belongs to and insert ++ * it to the RB-tree of inodes. ++ */ ++ inum = key_inum_flash(c, &dn->key); ++ fscki = read_add_inode(c, priv, inum); ++ if (IS_ERR(fscki)) { ++ err = PTR_ERR(fscki); ++ ubifs_err("error %d while processing data node and " ++ "trying to find inode node %lu", ++ err, (unsigned long)inum); ++ goto out_dump; ++ } ++ ++ /* Make sure the data node is within inode size */ ++ blk_offs = key_block_flash(c, &dn->key); ++ blk_offs <<= UBIFS_BLOCK_SHIFT; ++ blk_offs += le32_to_cpu(dn->size); ++ if (blk_offs > fscki->size) { ++ ubifs_err("data node at LEB %d:%d is not within inode " ++ "size %lld", zbr->lnum, zbr->offs, ++ fscki->size); ++ err = -EINVAL; ++ goto out_dump; ++ } ++ } else { ++ int nlen; ++ struct ubifs_dent_node *dent = node; ++ struct fsck_inode *fscki1; ++ ++ err = ubifs_validate_entry(c, dent); ++ if (err) ++ goto out_dump; ++ ++ /* ++ * Search the inode node this entry refers to and the parent ++ * inode node and insert them to the RB-tree of inodes. ++ */ ++ inum = le64_to_cpu(dent->inum); ++ fscki = read_add_inode(c, priv, inum); ++ if (IS_ERR(fscki)) { ++ err = PTR_ERR(fscki); ++ ubifs_err("error %d while processing entry node and " ++ "trying to find inode node %lu", ++ err, (unsigned long)inum); ++ goto out_dump; ++ } ++ ++ /* Count how many direntries or xentries refers this inode */ ++ fscki->references += 1; ++ ++ inum = key_inum_flash(c, &dent->key); ++ fscki1 = read_add_inode(c, priv, inum); ++ if (IS_ERR(fscki1)) { ++ err = PTR_ERR(fscki); ++ ubifs_err("error %d while processing entry node and " ++ "trying to find parent inode node %lu", ++ err, (unsigned long)inum); ++ goto out_dump; ++ } ++ ++ nlen = le16_to_cpu(dent->nlen); ++ if (type == UBIFS_XENT_KEY) { ++ fscki1->calc_xcnt += 1; ++ fscki1->calc_xsz += CALC_DENT_SIZE(nlen); ++ fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size); ++ fscki1->calc_xnms += nlen; ++ } else { ++ fscki1->calc_sz += CALC_DENT_SIZE(nlen); ++ if (dent->type == UBIFS_ITYPE_DIR) ++ fscki1->calc_cnt += 1; ++ } ++ } ++ ++out: ++ kfree(node); ++ return 0; ++ ++out_dump: ++ ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); ++ dbg_dump_node(c, node); ++out_free: ++ kfree(node); ++ return err; ++} ++ ++/** ++ * free_inodes - free RB-tree of inodes. ++ * @fsckd: FS checking information ++ */ ++static void free_inodes(struct fsck_data *fsckd) ++{ ++ struct rb_node *this = fsckd->inodes.rb_node; ++ struct fsck_inode *fscki; ++ ++ while (this) { ++ if (this->rb_left) ++ this = this->rb_left; ++ else if (this->rb_right) ++ this = this->rb_right; ++ else { ++ fscki = rb_entry(this, struct fsck_inode, rb); ++ this = rb_parent(this); ++ if (this) { ++ if (this->rb_left == &fscki->rb) ++ this->rb_left = NULL; ++ else ++ this->rb_right = NULL; ++ } ++ kfree(fscki); ++ } ++ } ++} ++ ++/** ++ * check_inodes - checks all inodes. ++ * @c: UBIFS file-system description object ++ * @fsckd: FS checking information ++ * ++ * This is a helper function for 'dbg_check_filesystem()' which walks the ++ * RB-tree of inodes after the index scan has been finished, and checks that ++ * inode nlink, size, etc are correct. Returns zero if inodes are fine, ++ * %-EINVAL if not, and a negative error code in case of failure. ++ */ ++static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd) ++{ ++ int n, err; ++ union ubifs_key key; ++ struct ubifs_znode *znode; ++ struct ubifs_zbranch *zbr; ++ struct ubifs_ino_node *ino; ++ struct fsck_inode *fscki; ++ struct rb_node *this = rb_first(&fsckd->inodes); ++ ++ while (this) { ++ fscki = rb_entry(this, struct fsck_inode, rb); ++ this = rb_next(this); ++ ++ if (S_ISDIR(fscki->mode)) { ++ /* ++ * Directories have to have exactly one reference (they ++ * cannot have hardlinks), although root inode is an ++ * exception. ++ */ ++ if (fscki->inum != UBIFS_ROOT_INO && ++ fscki->references != 1) { ++ ubifs_err("directory inode %lu has %d " ++ "direntries which refer it, but " ++ "should be 1", ++ (unsigned long)fscki->inum, ++ fscki->references); ++ goto out_dump; ++ } ++ if (fscki->inum == UBIFS_ROOT_INO && ++ fscki->references != 0) { ++ ubifs_err("root inode %lu has non-zero (%d) " ++ "direntries which refer it", ++ (unsigned long)fscki->inum, ++ fscki->references); ++ goto out_dump; ++ } ++ if (fscki->calc_sz != fscki->size) { ++ ubifs_err("directory inode %lu size is %lld, " ++ "but calculated size is %lld", ++ (unsigned long)fscki->inum, ++ fscki->size, fscki->calc_sz); ++ goto out_dump; ++ } ++ if (fscki->calc_cnt != fscki->nlink) { ++ ubifs_err("directory inode %lu nlink is %d, " ++ "but calculated nlink is %d", ++ (unsigned long)fscki->inum, ++ fscki->nlink, fscki->calc_cnt); ++ goto out_dump; ++ } ++ } else { ++ if (fscki->references != fscki->nlink) { ++ ubifs_err("inode %lu nlink is %d, but " ++ "calculated nlink is %d", ++ (unsigned long)fscki->inum, ++ fscki->nlink, fscki->references); ++ goto out_dump; ++ } ++ } ++ if (fscki->xattr_sz != fscki->calc_xsz) { ++ ubifs_err("inode %lu has xattr size %u, but " ++ "calculated size is %lld", ++ (unsigned long)fscki->inum, fscki->xattr_sz, ++ fscki->calc_xsz); ++ goto out_dump; ++ } ++ if (fscki->xattr_cnt != fscki->calc_xcnt) { ++ ubifs_err("inode %lu has %u xattrs, but " ++ "calculated count is %lld", ++ (unsigned long)fscki->inum, ++ fscki->xattr_cnt, fscki->calc_xcnt); ++ goto out_dump; ++ } ++ if (fscki->xattr_nms != fscki->calc_xnms) { ++ ubifs_err("inode %lu has xattr names' size %u, but " ++ "calculated names' size is %lld", ++ (unsigned long)fscki->inum, fscki->xattr_nms, ++ fscki->calc_xnms); ++ goto out_dump; ++ } ++ } ++ ++ return 0; ++ ++out_dump: ++ /* Read the bad inode and dump it */ ++ ino_key_init(c, &key, fscki->inum); ++ err = ubifs_lookup_level0(c, &key, &znode, &n); ++ if (!err) { ++ ubifs_err("inode %lu not found in index", ++ (unsigned long)fscki->inum); ++ return -ENOENT; ++ } else if (err < 0) { ++ ubifs_err("error %d while looking up inode %lu", ++ err, (unsigned long)fscki->inum); ++ return err; ++ } ++ ++ zbr = &znode->zbranch[n]; ++ ino = kmalloc(zbr->len, GFP_NOFS); ++ if (!ino) ++ return -ENOMEM; ++ ++ err = ubifs_tnc_read_node(c, zbr, ino); ++ if (err) { ++ ubifs_err("cannot read inode node at LEB %d:%d, error %d", ++ zbr->lnum, zbr->offs, err); ++ kfree(ino); ++ return err; ++ } ++ ++ ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", ++ (unsigned long)fscki->inum, zbr->lnum, zbr->offs); ++ dbg_dump_node(c, ino); ++ kfree(ino); ++ return -EINVAL; ++} ++ ++/** ++ * dbg_check_filesystem - check the file-system. ++ * @c: UBIFS file-system description object ++ * ++ * This function checks the file system, namely: ++ * o makes sure that all leaf nodes exist and their CRCs are correct; ++ * o makes sure inode nlink, size, xattr size/count are correct (for all ++ * inodes). ++ * ++ * The function reads whole indexing tree and all nodes, so it is pretty ++ * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if ++ * not, and a negative error code in case of failure. ++ */ ++int dbg_check_filesystem(struct ubifs_info *c) ++{ ++ int err; ++ struct fsck_data fsckd; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_FS)) ++ return 0; ++ ++ fsckd.inodes = RB_ROOT; ++ err = dbg_walk_index(c, check_leaf, NULL, &fsckd); ++ if (err) ++ goto out_free; ++ ++ err = check_inodes(c, &fsckd); ++ if (err) ++ goto out_free; ++ ++ free_inodes(&fsckd); ++ return 0; ++ ++out_free: ++ ubifs_err("file-system check failed with error %d", err); ++ dump_stack(); ++ free_inodes(&fsckd); ++ return err; ++} ++ ++static int invocation_cnt; ++ ++int dbg_force_in_the_gaps(void) ++{ ++ if (!dbg_force_in_the_gaps_enabled) ++ return 0; ++ /* Force in-the-gaps every 8th commit */ ++ return !((invocation_cnt++) & 0x7); ++} ++ ++/* Failure mode for recovery testing */ ++ ++#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) ++ ++struct failure_mode_info { ++ struct list_head list; ++ struct ubifs_info *c; ++}; ++ ++static LIST_HEAD(fmi_list); ++static DEFINE_SPINLOCK(fmi_lock); ++ ++static unsigned int next; ++ ++static int simple_rand(void) ++{ ++ if (next == 0) ++ next = current->pid; ++ next = next * 1103515245 + 12345; ++ return (next >> 16) & 32767; ++} ++ ++static void failure_mode_init(struct ubifs_info *c) ++{ ++ struct failure_mode_info *fmi; ++ ++ fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); ++ if (!fmi) { ++ ubifs_err("Failed to register failure mode - no memory"); ++ return; ++ } ++ fmi->c = c; ++ spin_lock(&fmi_lock); ++ list_add_tail(&fmi->list, &fmi_list); ++ spin_unlock(&fmi_lock); ++} ++ ++static void failure_mode_exit(struct ubifs_info *c) ++{ ++ struct failure_mode_info *fmi, *tmp; ++ ++ spin_lock(&fmi_lock); ++ list_for_each_entry_safe(fmi, tmp, &fmi_list, list) ++ if (fmi->c == c) { ++ list_del(&fmi->list); ++ kfree(fmi); ++ } ++ spin_unlock(&fmi_lock); ++} ++ ++static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) ++{ ++ struct failure_mode_info *fmi; ++ ++ spin_lock(&fmi_lock); ++ list_for_each_entry(fmi, &fmi_list, list) ++ if (fmi->c->ubi == desc) { ++ struct ubifs_info *c = fmi->c; ++ ++ spin_unlock(&fmi_lock); ++ return c; ++ } ++ spin_unlock(&fmi_lock); ++ return NULL; ++} ++ ++static int in_failure_mode(struct ubi_volume_desc *desc) ++{ ++ struct ubifs_info *c = dbg_find_info(desc); ++ ++ if (c && dbg_failure_mode) ++ return c->dbg->failure_mode; ++ return 0; ++} ++ ++static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) ++{ ++ struct ubifs_info *c = dbg_find_info(desc); ++ struct ubifs_debug_info *d; ++ ++ if (!c || !dbg_failure_mode) ++ return 0; ++ d = c->dbg; ++ if (d->failure_mode) ++ return 1; ++ if (!d->fail_cnt) { ++ /* First call - decide delay to failure */ ++ if (chance(1, 2)) { ++ unsigned int delay = 1 << (simple_rand() >> 11); ++ ++ if (chance(1, 2)) { ++ d->fail_delay = 1; ++ d->fail_timeout = jiffies + ++ msecs_to_jiffies(delay); ++ dbg_rcvry("failing after %ums", delay); ++ } else { ++ d->fail_delay = 2; ++ d->fail_cnt_max = delay; ++ dbg_rcvry("failing after %u calls", delay); ++ } ++ } ++ d->fail_cnt += 1; ++ } ++ /* Determine if failure delay has expired */ ++ if (d->fail_delay == 1) { ++ if (time_before(jiffies, d->fail_timeout)) ++ return 0; ++ } else if (d->fail_delay == 2) ++ if (d->fail_cnt++ < d->fail_cnt_max) ++ return 0; ++ if (lnum == UBIFS_SB_LNUM) { ++ if (write) { ++ if (chance(1, 2)) ++ return 0; ++ } else if (chance(19, 20)) ++ return 0; ++ dbg_rcvry("failing in super block LEB %d", lnum); ++ } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { ++ if (chance(19, 20)) ++ return 0; ++ dbg_rcvry("failing in master LEB %d", lnum); ++ } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { ++ if (write) { ++ if (chance(99, 100)) ++ return 0; ++ } else if (chance(399, 400)) ++ return 0; ++ dbg_rcvry("failing in log LEB %d", lnum); ++ } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { ++ if (write) { ++ if (chance(7, 8)) ++ return 0; ++ } else if (chance(19, 20)) ++ return 0; ++ dbg_rcvry("failing in LPT LEB %d", lnum); ++ } else if (lnum >= c->orph_first && lnum <= c->orph_last) { ++ if (write) { ++ if (chance(1, 2)) ++ return 0; ++ } else if (chance(9, 10)) ++ return 0; ++ dbg_rcvry("failing in orphan LEB %d", lnum); ++ } else if (lnum == c->ihead_lnum) { ++ if (chance(99, 100)) ++ return 0; ++ dbg_rcvry("failing in index head LEB %d", lnum); ++ } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { ++ if (chance(9, 10)) ++ return 0; ++ dbg_rcvry("failing in GC head LEB %d", lnum); ++ } else if (write && !RB_EMPTY_ROOT(&c->buds) && ++ !ubifs_search_bud(c, lnum)) { ++ if (chance(19, 20)) ++ return 0; ++ dbg_rcvry("failing in non-bud LEB %d", lnum); ++ } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || ++ c->cmt_state == COMMIT_RUNNING_REQUIRED) { ++ if (chance(999, 1000)) ++ return 0; ++ dbg_rcvry("failing in bud LEB %d commit running", lnum); ++ } else { ++ if (chance(9999, 10000)) ++ return 0; ++ dbg_rcvry("failing in bud LEB %d commit not running", lnum); ++ } ++ ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); ++ d->failure_mode = 1; ++ dump_stack(); ++ return 1; ++} ++ ++static void cut_data(const void *buf, int len) ++{ ++ int flen, i; ++ unsigned char *p = (void *)buf; ++ ++ flen = (len * (long long)simple_rand()) >> 15; ++ for (i = flen; i < len; i++) ++ p[i] = 0xff; ++} ++ ++int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, ++ int len, int check) ++{ ++ if (in_failure_mode(desc)) ++ return -EIO; ++ return ubi_leb_read(desc, lnum, buf, offset, len, check); ++} ++ ++int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, ++ int offset, int len, int dtype) ++{ ++ int err, failing; ++ ++ if (in_failure_mode(desc)) ++ return -EIO; ++ failing = do_fail(desc, lnum, 1); ++ if (failing) ++ cut_data(buf, len); ++ err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); ++ if (err) ++ return err; ++ if (failing) ++ return -EIO; ++ return 0; ++} ++ ++int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, ++ int len, int dtype) ++{ ++ int err; ++ ++ if (do_fail(desc, lnum, 1)) ++ return -EIO; ++ err = ubi_leb_change(desc, lnum, buf, len, dtype); ++ if (err) ++ return err; ++ if (do_fail(desc, lnum, 1)) ++ return -EIO; ++ return 0; ++} ++ ++int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) ++{ ++ int err; ++ ++ if (do_fail(desc, lnum, 0)) ++ return -EIO; ++ err = ubi_leb_erase(desc, lnum); ++ if (err) ++ return err; ++ if (do_fail(desc, lnum, 0)) ++ return -EIO; ++ return 0; ++} ++ ++int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) ++{ ++ int err; ++ ++ if (do_fail(desc, lnum, 0)) ++ return -EIO; ++ err = ubi_leb_unmap(desc, lnum); ++ if (err) ++ return err; ++ if (do_fail(desc, lnum, 0)) ++ return -EIO; ++ return 0; ++} ++ ++int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) ++{ ++ if (in_failure_mode(desc)) ++ return -EIO; ++ return ubi_is_mapped(desc, lnum); ++} ++ ++int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) ++{ ++ int err; ++ ++ if (do_fail(desc, lnum, 0)) ++ return -EIO; ++ err = ubi_leb_map(desc, lnum, dtype); ++ if (err) ++ return err; ++ if (do_fail(desc, lnum, 0)) ++ return -EIO; ++ return 0; ++} ++ ++/** ++ * ubifs_debugging_init - initialize UBIFS debugging. ++ * @c: UBIFS file-system description object ++ * ++ * This function initializes debugging-related data for the file system. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_debugging_init(struct ubifs_info *c) ++{ ++ c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); ++ if (!c->dbg) ++ return -ENOMEM; ++ ++ c->dbg->buf = vmalloc(c->leb_size); ++ if (!c->dbg->buf) ++ goto out; ++ ++ failure_mode_init(c); ++ return 0; ++ ++out: ++ kfree(c->dbg); ++ return -ENOMEM; ++} ++ ++/** ++ * ubifs_debugging_exit - free debugging data. ++ * @c: UBIFS file-system description object ++ */ ++void ubifs_debugging_exit(struct ubifs_info *c) ++{ ++ failure_mode_exit(c); ++ vfree(c->dbg->buf); ++ kfree(c->dbg); ++} ++ ++/* ++ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which ++ * contain the stuff specific to particular file-system mounts. ++ */ ++static struct dentry *dfs_rootdir; ++ ++/** ++ * dbg_debugfs_init - initialize debugfs file-system. ++ * ++ * UBIFS uses debugfs file-system to expose various debugging knobs to ++ * user-space. This function creates "ubifs" directory in the debugfs ++ * file-system. Returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++int dbg_debugfs_init(void) ++{ ++ dfs_rootdir = debugfs_create_dir("ubifs", NULL); ++ if (IS_ERR(dfs_rootdir)) { ++ int err = PTR_ERR(dfs_rootdir); ++ ubifs_err("cannot create \"ubifs\" debugfs directory, " ++ "error %d\n", err); ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. ++ */ ++void dbg_debugfs_exit(void) ++{ ++ debugfs_remove(dfs_rootdir); ++} ++ ++static int open_debugfs_file(struct inode *inode, struct file *file) ++{ ++ file->private_data = inode->i_private; ++ return 0; ++} ++ ++static ssize_t write_debugfs_file(struct file *file, const char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ struct ubifs_info *c = file->private_data; ++ struct ubifs_debug_info *d = c->dbg; ++ ++ if (file->f_path.dentry == d->dfs_dump_lprops) ++ dbg_dump_lprops(c); ++ else if (file->f_path.dentry == d->dfs_dump_budg) { ++ spin_lock(&c->space_lock); ++ dbg_dump_budg(c); ++ spin_unlock(&c->space_lock); ++ } else if (file->f_path.dentry == d->dfs_dump_tnc) { ++ mutex_lock(&c->tnc_mutex); ++ dbg_dump_tnc(c); ++ mutex_unlock(&c->tnc_mutex); ++ } else ++ return -EINVAL; ++ ++ *ppos += count; ++ return count; ++} ++ ++static const struct file_operations dfs_fops = { ++ .open = open_debugfs_file, ++ .write = write_debugfs_file, ++ .owner = THIS_MODULE, ++}; ++ ++/** ++ * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance. ++ * @c: UBIFS file-system description object ++ * ++ * This function creates all debugfs files for this instance of UBIFS. Returns ++ * zero in case of success and a negative error code in case of failure. ++ * ++ * Note, the only reason we have not merged this function with the ++ * 'ubifs_debugging_init()' function is because it is better to initialize ++ * debugfs interfaces at the very end of the mount process, and remove them at ++ * the very beginning of the mount process. ++ */ ++int dbg_debugfs_init_fs(struct ubifs_info *c) ++{ ++ int err; ++ const char *fname; ++ struct dentry *dent; ++ struct ubifs_debug_info *d = c->dbg; ++ ++ sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); ++ d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); ++ if (IS_ERR(d->dfs_dir)) { ++ err = PTR_ERR(d->dfs_dir); ++ ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", ++ d->dfs_dir_name, err); ++ goto out; ++ } ++ ++ fname = "dump_lprops"; ++ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR(dent)) ++ goto out_remove; ++ d->dfs_dump_lprops = dent; ++ ++ fname = "dump_budg"; ++ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR(dent)) ++ goto out_remove; ++ d->dfs_dump_budg = dent; ++ ++ fname = "dump_tnc"; ++ dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR(dent)) ++ goto out_remove; ++ d->dfs_dump_tnc = dent; ++ ++ return 0; ++ ++out_remove: ++ err = PTR_ERR(dent); ++ ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", ++ fname, err); ++ if (d->dfs_dump_tnc) ++ debugfs_remove(d->dfs_dump_tnc); ++ if (d->dfs_dump_budg) ++ debugfs_remove(d->dfs_dump_budg); ++ if (d->dfs_dump_lprops) ++ debugfs_remove(d->dfs_dump_lprops); ++ debugfs_remove(d->dfs_dir); ++out: ++ return err; ++} ++ ++/** ++ * dbg_debugfs_exit_fs - remove all debugfs files. ++ * @c: UBIFS file-system description object ++ */ ++void dbg_debugfs_exit_fs(struct ubifs_info *c) ++{ ++ struct ubifs_debug_info *d = c->dbg; ++ ++ debugfs_remove(d->dfs_dump_tnc); ++ debugfs_remove(d->dfs_dump_budg); ++ debugfs_remove(d->dfs_dump_lprops); ++ debugfs_remove(d->dfs_dir); ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -Nurd linux-2.6.24/fs/ubifs/debug.h ubifs-v2.6.24/fs/ubifs/debug.h +--- linux-2.6.24/fs/ubifs/debug.h 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/debug.h 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,507 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++#ifndef __UBIFS_DEBUG_H__ ++#define __UBIFS_DEBUG_H__ ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++/** ++ * ubifs_debug_info - per-FS debugging information. ++ * @buf: a buffer of LEB size, used for various purposes ++ * @old_zroot: old index root - used by 'dbg_check_old_index()' ++ * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' ++ * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' ++ * @failure_mode: failure mode for recovery testing ++ * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls ++ * @fail_timeout: time in jiffies when delay of failure mode expires ++ * @fail_cnt: current number of calls to failure mode I/O functions ++ * @fail_cnt_max: number of calls by which to delay failure mode ++ * @chk_lpt_sz: used by LPT tree size checker ++ * @chk_lpt_sz2: used by LPT tree size checker ++ * @chk_lpt_wastage: used by LPT tree size checker ++ * @chk_lpt_lebs: used by LPT tree size checker ++ * @new_nhead_offs: used by LPT tree size checker ++ * @new_ihead_lnum: used by debugging to check @c->ihead_lnum ++ * @new_ihead_offs: used by debugging to check @c->ihead_offs ++ * ++ * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') ++ * @saved_free: saved free space (used by 'dbg_save_space_info()') ++ * ++ * dfs_dir_name: name of debugfs directory containing this file-system's files ++ * dfs_dir: direntry object of the file-system debugfs directory ++ * dfs_dump_lprops: "dump lprops" debugfs knob ++ * dfs_dump_budg: "dump budgeting information" debugfs knob ++ * dfs_dump_tnc: "dump TNC" debugfs knob ++ */ ++struct ubifs_debug_info { ++ void *buf; ++ struct ubifs_zbranch old_zroot; ++ int old_zroot_level; ++ unsigned long long old_zroot_sqnum; ++ int failure_mode; ++ int fail_delay; ++ unsigned long fail_timeout; ++ unsigned int fail_cnt; ++ unsigned int fail_cnt_max; ++ long long chk_lpt_sz; ++ long long chk_lpt_sz2; ++ long long chk_lpt_wastage; ++ int chk_lpt_lebs; ++ int new_nhead_offs; ++ int new_ihead_lnum; ++ int new_ihead_offs; ++ ++ struct ubifs_lp_stats saved_lst; ++ long long saved_free; ++ ++ char dfs_dir_name[100]; ++ struct dentry *dfs_dir; ++ struct dentry *dfs_dump_lprops; ++ struct dentry *dfs_dump_budg; ++ struct dentry *dfs_dump_tnc; ++}; ++ ++#define ubifs_assert(expr) do { \ ++ if (unlikely(!(expr))) { \ ++ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ ++ __func__, __LINE__, current->pid); \ ++ dbg_dump_stack(); \ ++ } \ ++} while (0) ++ ++#define ubifs_assert_cmt_locked(c) do { \ ++ if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ ++ up_write(&(c)->commit_sem); \ ++ printk(KERN_CRIT "commit lock is not locked!\n"); \ ++ ubifs_assert(0); \ ++ } \ ++} while (0) ++ ++#define dbg_dump_stack() do { \ ++ if (!dbg_failure_mode) \ ++ dump_stack(); \ ++} while (0) ++ ++/* Generic debugging messages */ ++#define dbg_msg(fmt, ...) do { \ ++ spin_lock(&dbg_lock); \ ++ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ ++ __func__, ##__VA_ARGS__); \ ++ spin_unlock(&dbg_lock); \ ++} while (0) ++ ++#define dbg_do_msg(typ, fmt, ...) do { \ ++ if (ubifs_msg_flags & typ) \ ++ dbg_msg(fmt, ##__VA_ARGS__); \ ++} while (0) ++ ++#define dbg_err(fmt, ...) do { \ ++ spin_lock(&dbg_lock); \ ++ ubifs_err(fmt, ##__VA_ARGS__); \ ++ spin_unlock(&dbg_lock); \ ++} while (0) ++ ++const char *dbg_key_str0(const struct ubifs_info *c, ++ const union ubifs_key *key); ++const char *dbg_key_str1(const struct ubifs_info *c, ++ const union ubifs_key *key); ++ ++/* ++ * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message ++ * macros. ++ */ ++#define DBGKEY(key) dbg_key_str0(c, (key)) ++#define DBGKEY1(key) dbg_key_str1(c, (key)) ++ ++/* General messages */ ++#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) ++ ++/* Additional journal messages */ ++#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) ++ ++/* Additional TNC messages */ ++#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) ++ ++/* Additional lprops messages */ ++#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) ++ ++/* Additional LEB find messages */ ++#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) ++ ++/* Additional mount messages */ ++#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) ++ ++/* Additional I/O messages */ ++#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) ++ ++/* Additional commit messages */ ++#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) ++ ++/* Additional budgeting messages */ ++#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) ++ ++/* Additional log messages */ ++#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) ++ ++/* Additional gc messages */ ++#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) ++ ++/* Additional scan messages */ ++#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) ++ ++/* Additional recovery messages */ ++#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) ++ ++/* ++ * Debugging message type flags (must match msg_type_names in debug.c). ++ * ++ * UBIFS_MSG_GEN: general messages ++ * UBIFS_MSG_JNL: journal messages ++ * UBIFS_MSG_MNT: mount messages ++ * UBIFS_MSG_CMT: commit messages ++ * UBIFS_MSG_FIND: LEB find messages ++ * UBIFS_MSG_BUDG: budgeting messages ++ * UBIFS_MSG_GC: garbage collection messages ++ * UBIFS_MSG_TNC: TNC messages ++ * UBIFS_MSG_LP: lprops messages ++ * UBIFS_MSG_IO: I/O messages ++ * UBIFS_MSG_LOG: log messages ++ * UBIFS_MSG_SCAN: scan messages ++ * UBIFS_MSG_RCVRY: recovery messages ++ */ ++enum { ++ UBIFS_MSG_GEN = 0x1, ++ UBIFS_MSG_JNL = 0x2, ++ UBIFS_MSG_MNT = 0x4, ++ UBIFS_MSG_CMT = 0x8, ++ UBIFS_MSG_FIND = 0x10, ++ UBIFS_MSG_BUDG = 0x20, ++ UBIFS_MSG_GC = 0x40, ++ UBIFS_MSG_TNC = 0x80, ++ UBIFS_MSG_LP = 0x100, ++ UBIFS_MSG_IO = 0x200, ++ UBIFS_MSG_LOG = 0x400, ++ UBIFS_MSG_SCAN = 0x800, ++ UBIFS_MSG_RCVRY = 0x1000, ++}; ++ ++/* Debugging message type flags for each default debug message level */ ++#define UBIFS_MSG_LVL_0 0 ++#define UBIFS_MSG_LVL_1 0x1 ++#define UBIFS_MSG_LVL_2 0x7f ++#define UBIFS_MSG_LVL_3 0xffff ++ ++/* ++ * Debugging check flags (must match chk_names in debug.c). ++ * ++ * UBIFS_CHK_GEN: general checks ++ * UBIFS_CHK_TNC: check TNC ++ * UBIFS_CHK_IDX_SZ: check index size ++ * UBIFS_CHK_ORPH: check orphans ++ * UBIFS_CHK_OLD_IDX: check the old index ++ * UBIFS_CHK_LPROPS: check lprops ++ * UBIFS_CHK_FS: check the file-system ++ */ ++enum { ++ UBIFS_CHK_GEN = 0x1, ++ UBIFS_CHK_TNC = 0x2, ++ UBIFS_CHK_IDX_SZ = 0x4, ++ UBIFS_CHK_ORPH = 0x8, ++ UBIFS_CHK_OLD_IDX = 0x10, ++ UBIFS_CHK_LPROPS = 0x20, ++ UBIFS_CHK_FS = 0x40, ++}; ++ ++/* ++ * Special testing flags (must match tst_names in debug.c). ++ * ++ * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method ++ * UBIFS_TST_RCVRY: failure mode for recovery testing ++ */ ++enum { ++ UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, ++ UBIFS_TST_RCVRY = 0x4, ++}; ++ ++#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 ++#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 ++#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 ++#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 ++#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 ++#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 ++#else ++#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 ++#endif ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS ++#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff ++#else ++#define UBIFS_CHK_FLAGS_DEFAULT 0 ++#endif ++ ++extern spinlock_t dbg_lock; ++ ++extern unsigned int ubifs_msg_flags; ++extern unsigned int ubifs_chk_flags; ++extern unsigned int ubifs_tst_flags; ++ ++int ubifs_debugging_init(struct ubifs_info *c); ++void ubifs_debugging_exit(struct ubifs_info *c); ++ ++/* Dump functions */ ++const char *dbg_ntype(int type); ++const char *dbg_cstate(int cmt_state); ++const char *dbg_get_key_dump(const struct ubifs_info *c, ++ const union ubifs_key *key); ++void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); ++void dbg_dump_node(const struct ubifs_info *c, const void *node); ++void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, ++ int offs); ++void dbg_dump_budget_req(const struct ubifs_budget_req *req); ++void dbg_dump_lstats(const struct ubifs_lp_stats *lst); ++void dbg_dump_budg(struct ubifs_info *c); ++void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); ++void dbg_dump_lprops(struct ubifs_info *c); ++void dbg_dump_lpt_info(struct ubifs_info *c); ++void dbg_dump_leb(const struct ubifs_info *c, int lnum); ++void dbg_dump_znode(const struct ubifs_info *c, ++ const struct ubifs_znode *znode); ++void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); ++void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, ++ struct ubifs_nnode *parent, int iip); ++void dbg_dump_tnc(struct ubifs_info *c); ++void dbg_dump_index(struct ubifs_info *c); ++void dbg_dump_lpt_lebs(const struct ubifs_info *c); ++ ++/* Checking helper functions */ ++typedef int (*dbg_leaf_callback)(struct ubifs_info *c, ++ struct ubifs_zbranch *zbr, void *priv); ++typedef int (*dbg_znode_callback)(struct ubifs_info *c, ++ struct ubifs_znode *znode, void *priv); ++int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, ++ dbg_znode_callback znode_cb, void *priv); ++ ++/* Checking functions */ ++void dbg_save_space_info(struct ubifs_info *c); ++int dbg_check_space_info(struct ubifs_info *c); ++int dbg_check_lprops(struct ubifs_info *c); ++int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); ++int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); ++int dbg_check_cats(struct ubifs_info *c); ++int dbg_check_ltab(struct ubifs_info *c); ++int dbg_chk_lpt_free_spc(struct ubifs_info *c); ++int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); ++int dbg_check_synced_i_size(struct inode *inode); ++int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); ++int dbg_check_tnc(struct ubifs_info *c, int extra); ++int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); ++int dbg_check_filesystem(struct ubifs_info *c); ++void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, ++ int add_pos); ++int dbg_check_lprops(struct ubifs_info *c); ++int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, ++ int row, int col); ++ ++/* Force the use of in-the-gaps method for testing */ ++ ++#define dbg_force_in_the_gaps_enabled \ ++ (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) ++ ++int dbg_force_in_the_gaps(void); ++ ++/* Failure mode for recovery testing */ ++ ++#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) ++ ++#ifndef UBIFS_DBG_PRESERVE_UBI ++ ++#define ubi_leb_read dbg_leb_read ++#define ubi_leb_write dbg_leb_write ++#define ubi_leb_change dbg_leb_change ++#define ubi_leb_erase dbg_leb_erase ++#define ubi_leb_unmap dbg_leb_unmap ++#define ubi_is_mapped dbg_is_mapped ++#define ubi_leb_map dbg_leb_map ++ ++#endif ++ ++int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, ++ int len, int check); ++int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, ++ int offset, int len, int dtype); ++int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, ++ int len, int dtype); ++int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); ++int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); ++int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); ++int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); ++ ++static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, ++ int offset, int len) ++{ ++ return dbg_leb_read(desc, lnum, buf, offset, len, 0); ++} ++ ++static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, ++ const void *buf, int offset, int len) ++{ ++ return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); ++} ++ ++static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, ++ const void *buf, int len) ++{ ++ return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); ++} ++ ++/* Debugfs-related stuff */ ++int dbg_debugfs_init(void); ++void dbg_debugfs_exit(void); ++int dbg_debugfs_init_fs(struct ubifs_info *c); ++void dbg_debugfs_exit_fs(struct ubifs_info *c); ++ ++#else /* !CONFIG_UBIFS_FS_DEBUG */ ++ ++/* Use "if (0)" to make compiler check arguments even if debugging is off */ ++#define ubifs_assert(expr) do { \ ++ if (0 && (expr)) \ ++ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ ++ __func__, __LINE__, current->pid); \ ++} while (0) ++ ++#define dbg_err(fmt, ...) do { \ ++ if (0) \ ++ ubifs_err(fmt, ##__VA_ARGS__); \ ++} while (0) ++ ++#define dbg_msg(fmt, ...) do { \ ++ if (0) \ ++ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ ++ current->pid, __func__, ##__VA_ARGS__); \ ++} while (0) ++ ++#define dbg_dump_stack() ++#define ubifs_assert_cmt_locked(c) ++ ++#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++ ++#define DBGKEY(key) ((char *)(key)) ++#define DBGKEY1(key) ((char *)(key)) ++ ++#define ubifs_debugging_init(c) 0 ++#define ubifs_debugging_exit(c) ({}) ++ ++#define dbg_ntype(type) "" ++#define dbg_cstate(cmt_state) "" ++#define dbg_get_key_dump(c, key) ({}) ++#define dbg_dump_inode(c, inode) ({}) ++#define dbg_dump_node(c, node) ({}) ++#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) ++#define dbg_dump_budget_req(req) ({}) ++#define dbg_dump_lstats(lst) ({}) ++#define dbg_dump_budg(c) ({}) ++#define dbg_dump_lprop(c, lp) ({}) ++#define dbg_dump_lprops(c) ({}) ++#define dbg_dump_lpt_info(c) ({}) ++#define dbg_dump_leb(c, lnum) ({}) ++#define dbg_dump_znode(c, znode) ({}) ++#define dbg_dump_heap(c, heap, cat) ({}) ++#define dbg_dump_pnode(c, pnode, parent, iip) ({}) ++#define dbg_dump_tnc(c) ({}) ++#define dbg_dump_index(c) ({}) ++#define dbg_dump_lpt_lebs(c) ({}) ++ ++#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 ++#define dbg_old_index_check_init(c, zroot) 0 ++#define dbg_save_space_info(c) ({}) ++#define dbg_check_space_info(c) 0 ++#define dbg_check_old_index(c, zroot) 0 ++#define dbg_check_cats(c) 0 ++#define dbg_check_ltab(c) 0 ++#define dbg_chk_lpt_free_spc(c) 0 ++#define dbg_chk_lpt_sz(c, action, len) 0 ++#define dbg_check_synced_i_size(inode) 0 ++#define dbg_check_dir_size(c, dir) 0 ++#define dbg_check_tnc(c, x) 0 ++#define dbg_check_idx_size(c, idx_size) 0 ++#define dbg_check_filesystem(c) 0 ++#define dbg_check_heap(c, heap, cat, add_pos) ({}) ++#define dbg_check_lprops(c) 0 ++#define dbg_check_lpt_nodes(c, cnode, row, col) 0 ++#define dbg_force_in_the_gaps_enabled 0 ++#define dbg_force_in_the_gaps() 0 ++#define dbg_failure_mode 0 ++ ++#define dbg_debugfs_init() 0 ++#define dbg_debugfs_exit() ++#define dbg_debugfs_init_fs(c) 0 ++#define dbg_debugfs_exit_fs(c) 0 ++ ++#endif /* !CONFIG_UBIFS_FS_DEBUG */ ++ ++/* ++ * Some compatibility stuff goes here. ++ */ ++ ++#include <asm/div64.h> ++ ++static inline uint64_t div_u64(uint64_t dividend, uint64_t divisor) ++{ ++ do_div(dividend, divisor); ++ return dividend; ++} ++ ++static inline void ___list_splice(const struct list_head *list, ++ struct list_head *prev, ++ struct list_head *next) ++{ ++ struct list_head *first = list->next; ++ struct list_head *last = list->prev; ++ ++ first->prev = prev; ++ prev->next = first; ++ ++ last->next = next; ++ next->prev = last; ++} ++ ++/* The below has been back-ported from 2.6.28 */ ++static inline void list_splice_tail(struct list_head *list, ++ struct list_head *head) ++{ ++ if (!list_empty(list)) ++ ___list_splice(list, head->prev, head); ++} ++#endif /* !__UBIFS_DEBUG_H__ */ +diff -Nurd linux-2.6.24/fs/ubifs/dir.c ubifs-v2.6.24/fs/ubifs/dir.c +--- linux-2.6.24/fs/ubifs/dir.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/dir.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1215 @@ ++/* * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * Copyright (C) 2006, 2007 University of Szeged, Hungary ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ * Zoltan Sogor ++ */ ++ ++/* ++ * This file implements directory operations. ++ * ++ * All FS operations in this file allocate budget before writing anything to the ++ * media. If they fail to allocate it, the error is returned. The only ++ * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even ++ * if they unable to allocate the budget, because deletion %-ENOSPC failure is ++ * not what users are usually ready to get. UBIFS budgeting subsystem has some ++ * space reserved for these purposes. ++ * ++ * All operations in this file write all inodes which they change straight ++ * away, instead of marking them dirty. For example, 'ubifs_link()' changes ++ * @i_size of the parent inode and writes the parent inode together with the ++ * target inode. This was done to simplify file-system recovery which would ++ * otherwise be very difficult to do. The only exception is rename which marks ++ * the re-named inode dirty (because its @i_ctime is updated) but does not ++ * write it, but just marks it as dirty. ++ */ ++ ++#include "ubifs.h" ++ ++/** ++ * inherit_flags - inherit flags of the parent inode. ++ * @dir: parent inode ++ * @mode: new inode mode flags ++ * ++ * This is a helper function for 'ubifs_new_inode()' which inherits flag of the ++ * parent directory inode @dir. UBIFS inodes inherit the following flags: ++ * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on ++ * sub-directory basis; ++ * o %UBIFS_SYNC_FL - useful for the same reasons; ++ * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories. ++ * ++ * This function returns the inherited flags. ++ */ ++static int inherit_flags(const struct inode *dir, int mode) ++{ ++ int flags; ++ const struct ubifs_inode *ui = ubifs_inode(dir); ++ ++ if (!S_ISDIR(dir->i_mode)) ++ /* ++ * The parent is not a directory, which means that an extended ++ * attribute inode is being created. No flags. ++ */ ++ return 0; ++ ++ flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL); ++ if (!S_ISDIR(mode)) ++ /* The "DIRSYNC" flag only applies to directories */ ++ flags &= ~UBIFS_DIRSYNC_FL; ++ return flags; ++} ++ ++/** ++ * ubifs_new_inode - allocate new UBIFS inode object. ++ * @c: UBIFS file-system description object ++ * @dir: parent directory inode ++ * @mode: inode mode flags ++ * ++ * This function finds an unused inode number, allocates new inode and ++ * initializes it. Returns new inode in case of success and an error code in ++ * case of failure. ++ */ ++struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, ++ int mode) ++{ ++ struct inode *inode; ++ struct ubifs_inode *ui; ++ ++ inode = new_inode(c->vfs_sb); ++ ui = ubifs_inode(inode); ++ if (!inode) ++ return ERR_PTR(-ENOMEM); ++ ++ /* ++ * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and ++ * marking them dirty in file write path (see 'file_update_time()'). ++ * UBIFS has to fully control "clean <-> dirty" transitions of inodes ++ * to make budgeting work. ++ */ ++ inode->i_flags |= (S_NOCMTIME); ++ ++ inode->i_uid = current->fsuid; ++ if (dir->i_mode & S_ISGID) { ++ inode->i_gid = dir->i_gid; ++ if (S_ISDIR(mode)) ++ mode |= S_ISGID; ++ } else ++ inode->i_gid = current->fsgid; ++ inode->i_mode = mode; ++ inode->i_mtime = inode->i_atime = inode->i_ctime = ++ ubifs_current_time(inode); ++ inode->i_mapping->nrpages = 0; ++ /* Disable readahead */ ++ inode->i_mapping->backing_dev_info = &c->bdi; ++ ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ inode->i_mapping->a_ops = &ubifs_file_address_operations; ++ inode->i_op = &ubifs_file_inode_operations; ++ inode->i_fop = &ubifs_file_operations; ++ break; ++ case S_IFDIR: ++ inode->i_op = &ubifs_dir_inode_operations; ++ inode->i_fop = &ubifs_dir_operations; ++ inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ; ++ break; ++ case S_IFLNK: ++ inode->i_op = &ubifs_symlink_inode_operations; ++ break; ++ case S_IFSOCK: ++ case S_IFIFO: ++ case S_IFBLK: ++ case S_IFCHR: ++ inode->i_op = &ubifs_file_inode_operations; ++ break; ++ default: ++ BUG(); ++ } ++ ++ ui->flags = inherit_flags(dir, mode); ++ ubifs_set_inode_flags(inode); ++ if (S_ISREG(mode)) ++ ui->compr_type = c->default_compr; ++ else ++ ui->compr_type = UBIFS_COMPR_NONE; ++ ui->synced_i_size = 0; ++ ++ spin_lock(&c->cnt_lock); ++ /* Inode number overflow is currently not supported */ ++ if (c->highest_inum >= INUM_WARN_WATERMARK) { ++ if (c->highest_inum >= INUM_WATERMARK) { ++ spin_unlock(&c->cnt_lock); ++ ubifs_err("out of inode numbers"); ++ make_bad_inode(inode); ++ iput(inode); ++ return ERR_PTR(-EINVAL); ++ } ++ ubifs_warn("running out of inode numbers (current %lu, max %d)", ++ (unsigned long)c->highest_inum, INUM_WATERMARK); ++ } ++ ++ inode->i_ino = ++c->highest_inum; ++ /* ++ * The creation sequence number remains with this inode for its ++ * lifetime. All nodes for this inode have a greater sequence number, ++ * and so it is possible to distinguish obsolete nodes belonging to a ++ * previous incarnation of the same inode number - for example, for the ++ * purpose of rebuilding the index. ++ */ ++ ui->creat_sqnum = ++c->max_sqnum; ++ spin_unlock(&c->cnt_lock); ++ return inode; ++} ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) ++{ ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ if (le16_to_cpu(dent->nlen) != nm->len) ++ return -EINVAL; ++ if (memcmp(dent->name, nm->name, nm->len)) ++ return -EINVAL; ++ return 0; ++} ++ ++#else ++ ++#define dbg_check_name(dent, nm) 0 ++ ++#endif ++ ++static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ int err; ++ union ubifs_key key; ++ struct inode *inode = NULL; ++ struct ubifs_dent_node *dent; ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ ++ dbg_gen("'%.*s' in dir ino %lu", ++ dentry->d_name.len, dentry->d_name.name, dir->i_ino); ++ ++ if (dentry->d_name.len > UBIFS_MAX_NLEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); ++ if (!dent) ++ return ERR_PTR(-ENOMEM); ++ ++ dent_key_init(c, &key, dir->i_ino, &dentry->d_name); ++ ++ err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); ++ if (err) { ++ /* ++ * Do not hash the direntry if parent 'i_nlink' is zero, because ++ * this has side-effects - '->delete_inode()' call will not be ++ * called for the parent orphan inode, because 'd_count' of its ++ * direntry will stay 1 (it'll be negative direntry I guess) ++ * and prevent 'iput_final()' until the dentry is destroyed due ++ * to unmount or memory pressure. ++ */ ++ if (err == -ENOENT && dir->i_nlink != 0) { ++ dbg_gen("not found"); ++ goto done; ++ } ++ goto out; ++ } ++ ++ if (dbg_check_name(dent, &dentry->d_name)) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum)); ++ if (IS_ERR(inode)) { ++ /* ++ * This should not happen. Probably the file-system needs ++ * checking. ++ */ ++ err = PTR_ERR(inode); ++ ubifs_err("dead directory entry '%.*s', error %d", ++ dentry->d_name.len, dentry->d_name.name, err); ++ ubifs_ro_mode(c, err); ++ goto out; ++ } ++ ++done: ++ kfree(dent); ++ return d_splice_alias(inode, dentry); ++ ++out: ++ kfree(dent); ++ return ERR_PTR(err); ++} ++ ++static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd) ++{ ++ struct inode *inode; ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .dirtied_ino = 1 }; ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ ++ /* ++ * Budget request settings: new inode, new direntry, changing the ++ * parent directory inode. ++ */ ++ ++ dbg_gen("dent '%.*s', mode %#x in dir ino %lu", ++ dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ inode = ubifs_new_inode(c, dir, mode); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_budg; ++ } ++ ++ mutex_lock(&dir_ui->ui_mutex); ++ dir->i_size += sz_change; ++ dir_ui->ui_size = dir->i_size; ++ dir->i_mtime = dir->i_ctime = inode->i_ctime; ++ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); ++ if (err) ++ goto out_cancel; ++ mutex_unlock(&dir_ui->ui_mutex); ++ ++ ubifs_release_budget(c, &req); ++ insert_inode_hash(inode); ++ d_instantiate(dentry, inode); ++ return 0; ++ ++out_cancel: ++ dir->i_size -= sz_change; ++ dir_ui->ui_size = dir->i_size; ++ mutex_unlock(&dir_ui->ui_mutex); ++ make_bad_inode(inode); ++ iput(inode); ++out_budg: ++ ubifs_release_budget(c, &req); ++ ubifs_err("cannot create regular file, error %d", err); ++ return err; ++} ++ ++/** ++ * vfs_dent_type - get VFS directory entry type. ++ * @type: UBIFS directory entry type ++ * ++ * This function converts UBIFS directory entry type into VFS directory entry ++ * type. ++ */ ++static unsigned int vfs_dent_type(uint8_t type) ++{ ++ switch (type) { ++ case UBIFS_ITYPE_REG: ++ return DT_REG; ++ case UBIFS_ITYPE_DIR: ++ return DT_DIR; ++ case UBIFS_ITYPE_LNK: ++ return DT_LNK; ++ case UBIFS_ITYPE_BLK: ++ return DT_BLK; ++ case UBIFS_ITYPE_CHR: ++ return DT_CHR; ++ case UBIFS_ITYPE_FIFO: ++ return DT_FIFO; ++ case UBIFS_ITYPE_SOCK: ++ return DT_SOCK; ++ default: ++ BUG(); ++ } ++ return 0; ++} ++ ++/* ++ * The classical Unix view for directory is that it is a linear array of ++ * (name, inode number) entries. Linux/VFS assumes this model as well. ++ * Particularly, 'readdir()' call wants us to return a directory entry offset ++ * which later may be used to continue 'readdir()'ing the directory or to ++ * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this ++ * model because directory entries are identified by keys, which may collide. ++ * ++ * UBIFS uses directory entry hash value for directory offsets, so ++ * 'seekdir()'/'telldir()' may not always work because of possible key ++ * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work ++ * properly by means of saving full directory entry name in the private field ++ * of the file description object. ++ * ++ * This means that UBIFS cannot support NFS which requires full ++ * 'seekdir()'/'telldir()' support. ++ */ ++static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err, over = 0; ++ struct qstr nm; ++ union ubifs_key key; ++ struct ubifs_dent_node *dent; ++ struct inode *dir = file->f_path.dentry->d_inode; ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ ++ dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); ++ ++ if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) ++ /* ++ * The directory was seek'ed to a senseless position or there ++ * are no more entries. ++ */ ++ return 0; ++ ++ /* File positions 0 and 1 correspond to "." and ".." */ ++ if (file->f_pos == 0) { ++ ubifs_assert(!file->private_data); ++ over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); ++ if (over) ++ return 0; ++ file->f_pos = 1; ++ } ++ ++ if (file->f_pos == 1) { ++ ubifs_assert(!file->private_data); ++ over = filldir(dirent, "..", 2, 1, ++ parent_ino(file->f_path.dentry), DT_DIR); ++ if (over) ++ return 0; ++ ++ /* Find the first entry in TNC and save it */ ++ lowest_dent_key(c, &key, dir->i_ino); ++ nm.name = NULL; ++ dent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(dent)) { ++ err = PTR_ERR(dent); ++ goto out; ++ } ++ ++ file->f_pos = key_hash_flash(c, &dent->key); ++ file->private_data = dent; ++ } ++ ++ dent = file->private_data; ++ if (!dent) { ++ /* ++ * The directory was seek'ed to and is now readdir'ed. ++ * Find the entry corresponding to @file->f_pos or the ++ * closest one. ++ */ ++ dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); ++ nm.name = NULL; ++ dent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(dent)) { ++ err = PTR_ERR(dent); ++ goto out; ++ } ++ file->f_pos = key_hash_flash(c, &dent->key); ++ file->private_data = dent; ++ } ++ ++ while (1) { ++ dbg_gen("feed '%s', ino %llu, new f_pos %#x", ++ dent->name, (unsigned long long)le64_to_cpu(dent->inum), ++ key_hash_flash(c, &dent->key)); ++ ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ++ ubifs_inode(dir)->creat_sqnum); ++ ++ nm.len = le16_to_cpu(dent->nlen); ++ over = filldir(dirent, dent->name, nm.len, file->f_pos, ++ le64_to_cpu(dent->inum), ++ vfs_dent_type(dent->type)); ++ if (over) ++ return 0; ++ ++ /* Switch to the next entry */ ++ key_read(c, &dent->key, &key); ++ nm.name = dent->name; ++ dent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(dent)) { ++ err = PTR_ERR(dent); ++ goto out; ++ } ++ ++ kfree(file->private_data); ++ file->f_pos = key_hash_flash(c, &dent->key); ++ file->private_data = dent; ++ cond_resched(); ++ } ++ ++out: ++ if (err != -ENOENT) { ++ ubifs_err("cannot find next direntry, error %d", err); ++ return err; ++ } ++ ++ kfree(file->private_data); ++ file->private_data = NULL; ++ file->f_pos = 2; ++ return 0; ++} ++ ++/* If a directory is seeked, we have to free saved readdir() state */ ++static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) ++{ ++ kfree(file->private_data); ++ file->private_data = NULL; ++ return generic_file_llseek(file, offset, origin); ++} ++ ++/* Free saved readdir() state when the directory is closed */ ++static int ubifs_dir_release(struct inode *dir, struct file *file) ++{ ++ kfree(file->private_data); ++ file->private_data = NULL; ++ return 0; ++} ++ ++/** ++ * lock_2_inodes - a wrapper for locking two UBIFS inodes. ++ * @inode1: first inode ++ * @inode2: second inode ++ * ++ * We do not implement any tricks to guarantee strict lock ordering, because ++ * VFS has already done it for us on the @i_mutex. So this is just a simple ++ * wrapper function. ++ */ ++static void lock_2_inodes(struct inode *inode1, struct inode *inode2) ++{ ++ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); ++ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); ++} ++ ++/** ++ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. ++ * @inode1: first inode ++ * @inode2: second inode ++ */ ++static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) ++{ ++ mutex_unlock(&ubifs_inode(inode2)->ui_mutex); ++ mutex_unlock(&ubifs_inode(inode1)->ui_mutex); ++} ++ ++static int ubifs_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *dentry) ++{ ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ struct inode *inode = old_dentry->d_inode; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); ++ struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, ++ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; ++ ++ /* ++ * Budget request settings: new direntry, changing the target inode, ++ * changing the parent inode. ++ */ ++ ++ dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", ++ dentry->d_name.len, dentry->d_name.name, inode->i_ino, ++ inode->i_nlink, dir->i_ino); ++ ubifs_assert(mutex_is_locked(&dir->i_mutex)); ++ ubifs_assert(mutex_is_locked(&inode->i_mutex)); ++ err = dbg_check_synced_i_size(inode); ++ if (err) ++ return err; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ lock_2_inodes(dir, inode); ++ inc_nlink(inode); ++ atomic_inc(&inode->i_count); ++ inode->i_ctime = ubifs_current_time(inode); ++ dir->i_size += sz_change; ++ dir_ui->ui_size = dir->i_size; ++ dir->i_mtime = dir->i_ctime = inode->i_ctime; ++ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); ++ if (err) ++ goto out_cancel; ++ unlock_2_inodes(dir, inode); ++ ++ ubifs_release_budget(c, &req); ++ d_instantiate(dentry, inode); ++ return 0; ++ ++out_cancel: ++ dir->i_size -= sz_change; ++ dir_ui->ui_size = dir->i_size; ++ drop_nlink(inode); ++ unlock_2_inodes(dir, inode); ++ ubifs_release_budget(c, &req); ++ iput(inode); ++ return err; ++} ++ ++static int ubifs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ struct inode *inode = dentry->d_inode; ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); ++ int err, budgeted = 1; ++ struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; ++ ++ /* ++ * Budget request settings: deletion direntry, deletion inode (+1 for ++ * @dirtied_ino), changing the parent directory inode. If budgeting ++ * fails, go ahead anyway because we have extra space reserved for ++ * deletions. ++ */ ++ ++ dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", ++ dentry->d_name.len, dentry->d_name.name, inode->i_ino, ++ inode->i_nlink, dir->i_ino); ++ ubifs_assert(mutex_is_locked(&dir->i_mutex)); ++ ubifs_assert(mutex_is_locked(&inode->i_mutex)); ++ err = dbg_check_synced_i_size(inode); ++ if (err) ++ return err; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) { ++ if (err != -ENOSPC) ++ return err; ++ budgeted = 0; ++ } ++ ++ lock_2_inodes(dir, inode); ++ inode->i_ctime = ubifs_current_time(dir); ++ drop_nlink(inode); ++ dir->i_size -= sz_change; ++ dir_ui->ui_size = dir->i_size; ++ dir->i_mtime = dir->i_ctime = inode->i_ctime; ++ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); ++ if (err) ++ goto out_cancel; ++ unlock_2_inodes(dir, inode); ++ ++ if (budgeted) ++ ubifs_release_budget(c, &req); ++ else { ++ /* We've deleted something - clean the "no space" flags */ ++ c->nospace = c->nospace_rp = 0; ++ smp_wmb(); ++ } ++ return 0; ++ ++out_cancel: ++ dir->i_size += sz_change; ++ dir_ui->ui_size = dir->i_size; ++ inc_nlink(inode); ++ unlock_2_inodes(dir, inode); ++ if (budgeted) ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++/** ++ * check_dir_empty - check if a directory is empty or not. ++ * @c: UBIFS file-system description object ++ * @dir: VFS inode object of the directory to check ++ * ++ * This function checks if directory @dir is empty. Returns zero if the ++ * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes ++ * in case of of errors. ++ */ ++static int check_dir_empty(struct ubifs_info *c, struct inode *dir) ++{ ++ struct qstr nm = { .name = NULL }; ++ struct ubifs_dent_node *dent; ++ union ubifs_key key; ++ int err; ++ ++ lowest_dent_key(c, &key, dir->i_ino); ++ dent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(dent)) { ++ err = PTR_ERR(dent); ++ if (err == -ENOENT) ++ err = 0; ++ } else { ++ kfree(dent); ++ err = -ENOTEMPTY; ++ } ++ return err; ++} ++ ++static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ struct inode *inode = dentry->d_inode; ++ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); ++ int err, budgeted = 1; ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; ++ ++ /* ++ * Budget request settings: deletion direntry, deletion inode and ++ * changing the parent inode. If budgeting fails, go ahead anyway ++ * because we have extra space reserved for deletions. ++ */ ++ ++ dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, ++ dentry->d_name.name, inode->i_ino, dir->i_ino); ++ ubifs_assert(mutex_is_locked(&dir->i_mutex)); ++ ubifs_assert(mutex_is_locked(&inode->i_mutex)); ++ err = check_dir_empty(c, dentry->d_inode); ++ if (err) ++ return err; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) { ++ if (err != -ENOSPC) ++ return err; ++ budgeted = 0; ++ } ++ ++ lock_2_inodes(dir, inode); ++ inode->i_ctime = ubifs_current_time(dir); ++ clear_nlink(inode); ++ drop_nlink(dir); ++ dir->i_size -= sz_change; ++ dir_ui->ui_size = dir->i_size; ++ dir->i_mtime = dir->i_ctime = inode->i_ctime; ++ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0); ++ if (err) ++ goto out_cancel; ++ unlock_2_inodes(dir, inode); ++ ++ if (budgeted) ++ ubifs_release_budget(c, &req); ++ else { ++ /* We've deleted something - clean the "no space" flags */ ++ c->nospace = c->nospace_rp = 0; ++ smp_wmb(); ++ } ++ return 0; ++ ++out_cancel: ++ dir->i_size += sz_change; ++ dir_ui->ui_size = dir->i_size; ++ inc_nlink(dir); ++ inc_nlink(inode); ++ inc_nlink(inode); ++ unlock_2_inodes(dir, inode); ++ if (budgeted) ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ struct inode *inode; ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; ++ ++ /* ++ * Budget request settings: new inode, new direntry and changing parent ++ * directory inode. ++ */ ++ ++ dbg_gen("dent '%.*s', mode %#x in dir ino %lu", ++ dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ inode = ubifs_new_inode(c, dir, S_IFDIR | mode); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_budg; ++ } ++ ++ mutex_lock(&dir_ui->ui_mutex); ++ insert_inode_hash(inode); ++ inc_nlink(inode); ++ inc_nlink(dir); ++ dir->i_size += sz_change; ++ dir_ui->ui_size = dir->i_size; ++ dir->i_mtime = dir->i_ctime = inode->i_ctime; ++ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); ++ if (err) { ++ ubifs_err("cannot create directory, error %d", err); ++ goto out_cancel; ++ } ++ mutex_unlock(&dir_ui->ui_mutex); ++ ++ ubifs_release_budget(c, &req); ++ d_instantiate(dentry, inode); ++ return 0; ++ ++out_cancel: ++ dir->i_size -= sz_change; ++ dir_ui->ui_size = dir->i_size; ++ drop_nlink(dir); ++ mutex_unlock(&dir_ui->ui_mutex); ++ make_bad_inode(inode); ++ iput(inode); ++out_budg: ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++static int ubifs_mknod(struct inode *dir, struct dentry *dentry, ++ int mode, dev_t rdev) ++{ ++ struct inode *inode; ++ struct ubifs_inode *ui; ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ union ubifs_dev_desc *dev = NULL; ++ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); ++ int err, devlen = 0; ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .new_ino_d = ALIGN(devlen, 8), ++ .dirtied_ino = 1 }; ++ ++ /* ++ * Budget request settings: new inode, new direntry and changing parent ++ * directory inode. ++ */ ++ ++ dbg_gen("dent '%.*s' in dir ino %lu", ++ dentry->d_name.len, dentry->d_name.name, dir->i_ino); ++ ++ if (!new_valid_dev(rdev)) ++ return -EINVAL; ++ ++ if (S_ISBLK(mode) || S_ISCHR(mode)) { ++ dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); ++ if (!dev) ++ return -ENOMEM; ++ devlen = ubifs_encode_dev(dev, rdev); ++ } ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) { ++ kfree(dev); ++ return err; ++ } ++ ++ inode = ubifs_new_inode(c, dir, mode); ++ if (IS_ERR(inode)) { ++ kfree(dev); ++ err = PTR_ERR(inode); ++ goto out_budg; ++ } ++ ++ init_special_inode(inode, inode->i_mode, rdev); ++ inode->i_size = ubifs_inode(inode)->ui_size = devlen; ++ ui = ubifs_inode(inode); ++ ui->data = dev; ++ ui->data_len = devlen; ++ ++ mutex_lock(&dir_ui->ui_mutex); ++ dir->i_size += sz_change; ++ dir_ui->ui_size = dir->i_size; ++ dir->i_mtime = dir->i_ctime = inode->i_ctime; ++ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); ++ if (err) ++ goto out_cancel; ++ mutex_unlock(&dir_ui->ui_mutex); ++ ++ ubifs_release_budget(c, &req); ++ insert_inode_hash(inode); ++ d_instantiate(dentry, inode); ++ return 0; ++ ++out_cancel: ++ dir->i_size -= sz_change; ++ dir_ui->ui_size = dir->i_size; ++ mutex_unlock(&dir_ui->ui_mutex); ++ make_bad_inode(inode); ++ iput(inode); ++out_budg: ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++static int ubifs_symlink(struct inode *dir, struct dentry *dentry, ++ const char *symname) ++{ ++ struct inode *inode; ++ struct ubifs_inode *ui; ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ int err, len = strlen(symname); ++ int sz_change = CALC_DENT_SIZE(dentry->d_name.len); ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .new_ino_d = ALIGN(len, 8), ++ .dirtied_ino = 1 }; ++ ++ /* ++ * Budget request settings: new inode, new direntry and changing parent ++ * directory inode. ++ */ ++ ++ dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, ++ dentry->d_name.name, symname, dir->i_ino); ++ ++ if (len > UBIFS_MAX_INO_DATA) ++ return -ENAMETOOLONG; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_budg; ++ } ++ ++ ui = ubifs_inode(inode); ++ ui->data = kmalloc(len + 1, GFP_NOFS); ++ if (!ui->data) { ++ err = -ENOMEM; ++ goto out_inode; ++ } ++ ++ memcpy(ui->data, symname, len); ++ ((char *)ui->data)[len] = '\0'; ++ /* ++ * The terminating zero byte is not written to the flash media and it ++ * is put just to make later in-memory string processing simpler. Thus, ++ * data length is @len, not @len + %1. ++ */ ++ ui->data_len = len; ++ inode->i_size = ubifs_inode(inode)->ui_size = len; ++ ++ mutex_lock(&dir_ui->ui_mutex); ++ dir->i_size += sz_change; ++ dir_ui->ui_size = dir->i_size; ++ dir->i_mtime = dir->i_ctime = inode->i_ctime; ++ err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); ++ if (err) ++ goto out_cancel; ++ mutex_unlock(&dir_ui->ui_mutex); ++ ++ ubifs_release_budget(c, &req); ++ insert_inode_hash(inode); ++ d_instantiate(dentry, inode); ++ return 0; ++ ++out_cancel: ++ dir->i_size -= sz_change; ++ dir_ui->ui_size = dir->i_size; ++ mutex_unlock(&dir_ui->ui_mutex); ++out_inode: ++ make_bad_inode(inode); ++ iput(inode); ++out_budg: ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++/** ++ * lock_3_inodes - a wrapper for locking three UBIFS inodes. ++ * @inode1: first inode ++ * @inode2: second inode ++ * @inode3: third inode ++ * ++ * This function is used for 'ubifs_rename()' and @inode1 may be the same as ++ * @inode2 whereas @inode3 may be %NULL. ++ * ++ * We do not implement any tricks to guarantee strict lock ordering, because ++ * VFS has already done it for us on the @i_mutex. So this is just a simple ++ * wrapper function. ++ */ ++static void lock_3_inodes(struct inode *inode1, struct inode *inode2, ++ struct inode *inode3) ++{ ++ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); ++ if (inode2 != inode1) ++ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); ++ if (inode3) ++ mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3); ++} ++ ++/** ++ * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename. ++ * @inode1: first inode ++ * @inode2: second inode ++ * @inode3: third inode ++ */ ++static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, ++ struct inode *inode3) ++{ ++ if (inode3) ++ mutex_unlock(&ubifs_inode(inode3)->ui_mutex); ++ if (inode1 != inode2) ++ mutex_unlock(&ubifs_inode(inode2)->ui_mutex); ++ mutex_unlock(&ubifs_inode(inode1)->ui_mutex); ++} ++ ++static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry) ++{ ++ struct ubifs_info *c = old_dir->i_sb->s_fs_info; ++ struct inode *old_inode = old_dentry->d_inode; ++ struct inode *new_inode = new_dentry->d_inode; ++ struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode); ++ int err, release, sync = 0, move = (new_dir != old_dir); ++ int is_dir = S_ISDIR(old_inode->i_mode); ++ int unlink = !!new_inode; ++ int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len); ++ int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len); ++ struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, ++ .dirtied_ino = 3 }; ++ struct ubifs_budget_req ino_req = { .dirtied_ino = 1, ++ .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; ++ struct timespec time; ++ ++ /* ++ * Budget request settings: deletion direntry, new direntry, removing ++ * the old inode, and changing old and new parent directory inodes. ++ * ++ * However, this operation also marks the target inode as dirty and ++ * does not write it, so we allocate budget for the target inode ++ * separately. ++ */ ++ ++ dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " ++ "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, ++ old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, ++ new_dentry->d_name.name, new_dir->i_ino); ++ ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); ++ ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); ++ if (unlink) ++ ubifs_assert(mutex_is_locked(&new_inode->i_mutex)); ++ ++ ++ if (unlink && is_dir) { ++ err = check_dir_empty(c, new_inode); ++ if (err) ++ return err; ++ } ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ err = ubifs_budget_space(c, &ino_req); ++ if (err) { ++ ubifs_release_budget(c, &req); ++ return err; ++ } ++ ++ lock_3_inodes(old_dir, new_dir, new_inode); ++ ++ /* ++ * Like most other Unix systems, set the @i_ctime for inodes on a ++ * rename. ++ */ ++ time = ubifs_current_time(old_dir); ++ old_inode->i_ctime = time; ++ ++ /* We must adjust parent link count when renaming directories */ ++ if (is_dir) { ++ if (move) { ++ /* ++ * @old_dir loses a link because we are moving ++ * @old_inode to a different directory. ++ */ ++ drop_nlink(old_dir); ++ /* ++ * @new_dir only gains a link if we are not also ++ * overwriting an existing directory. ++ */ ++ if (!unlink) ++ inc_nlink(new_dir); ++ } else { ++ /* ++ * @old_inode is not moving to a different directory, ++ * but @old_dir still loses a link if we are ++ * overwriting an existing directory. ++ */ ++ if (unlink) ++ drop_nlink(old_dir); ++ } ++ } ++ ++ old_dir->i_size -= old_sz; ++ ubifs_inode(old_dir)->ui_size = old_dir->i_size; ++ old_dir->i_mtime = old_dir->i_ctime = time; ++ new_dir->i_mtime = new_dir->i_ctime = time; ++ ++ /* ++ * And finally, if we unlinked a direntry which happened to have the ++ * same name as the moved direntry, we have to decrement @i_nlink of ++ * the unlinked inode and change its ctime. ++ */ ++ if (unlink) { ++ /* ++ * Directories cannot have hard-links, so if this is a ++ * directory, decrement its @i_nlink twice because an empty ++ * directory has @i_nlink 2. ++ */ ++ if (is_dir) ++ drop_nlink(new_inode); ++ new_inode->i_ctime = time; ++ drop_nlink(new_inode); ++ } else { ++ new_dir->i_size += new_sz; ++ ubifs_inode(new_dir)->ui_size = new_dir->i_size; ++ } ++ ++ /* ++ * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode ++ * is dirty, because this will be done later on at the end of ++ * 'ubifs_rename()'. ++ */ ++ if (IS_SYNC(old_inode)) { ++ sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); ++ if (unlink && IS_SYNC(new_inode)) ++ sync = 1; ++ } ++ err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry, ++ sync); ++ if (err) ++ goto out_cancel; ++ ++ unlock_3_inodes(old_dir, new_dir, new_inode); ++ ubifs_release_budget(c, &req); ++ ++ mutex_lock(&old_inode_ui->ui_mutex); ++ release = old_inode_ui->dirty; ++ mark_inode_dirty_sync(old_inode); ++ mutex_unlock(&old_inode_ui->ui_mutex); ++ ++ if (release) ++ ubifs_release_budget(c, &ino_req); ++ if (IS_SYNC(old_inode)) ++ err = old_inode->i_sb->s_op->write_inode(old_inode, 1); ++ return err; ++ ++out_cancel: ++ if (unlink) { ++ if (is_dir) ++ inc_nlink(new_inode); ++ inc_nlink(new_inode); ++ } else { ++ new_dir->i_size -= new_sz; ++ ubifs_inode(new_dir)->ui_size = new_dir->i_size; ++ } ++ old_dir->i_size += old_sz; ++ ubifs_inode(old_dir)->ui_size = old_dir->i_size; ++ if (is_dir) { ++ if (move) { ++ inc_nlink(old_dir); ++ if (!unlink) ++ drop_nlink(new_dir); ++ } else { ++ if (unlink) ++ inc_nlink(old_dir); ++ } ++ } ++ unlock_3_inodes(old_dir, new_dir, new_inode); ++ ubifs_release_budget(c, &ino_req); ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat) ++{ ++ loff_t size; ++ struct inode *inode = dentry->d_inode; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ mutex_lock(&ui->ui_mutex); ++ stat->dev = inode->i_sb->s_dev; ++ stat->ino = inode->i_ino; ++ stat->mode = inode->i_mode; ++ stat->nlink = inode->i_nlink; ++ stat->uid = inode->i_uid; ++ stat->gid = inode->i_gid; ++ stat->rdev = inode->i_rdev; ++ stat->atime = inode->i_atime; ++ stat->mtime = inode->i_mtime; ++ stat->ctime = inode->i_ctime; ++ stat->blksize = UBIFS_BLOCK_SIZE; ++ stat->size = ui->ui_size; ++ ++ /* ++ * Unfortunately, the 'stat()' system call was designed for block ++ * device based file systems, and it is not appropriate for UBIFS, ++ * because UBIFS does not have notion of "block". For example, it is ++ * difficult to tell how many block a directory takes - it actually ++ * takes less than 300 bytes, but we have to round it to block size, ++ * which introduces large mistake. This makes utilities like 'du' to ++ * report completely senseless numbers. This is the reason why UBIFS ++ * goes the same way as JFFS2 - it reports zero blocks for everything ++ * but regular files, which makes more sense than reporting completely ++ * wrong sizes. ++ */ ++ if (S_ISREG(inode->i_mode)) { ++ size = ui->xattr_size; ++ size += stat->size; ++ size = ALIGN(size, UBIFS_BLOCK_SIZE); ++ /* ++ * Note, user-space expects 512-byte blocks count irrespectively ++ * of what was reported in @stat->size. ++ */ ++ stat->blocks = size >> 9; ++ } else ++ stat->blocks = 0; ++ mutex_unlock(&ui->ui_mutex); ++ return 0; ++} ++ ++const struct inode_operations ubifs_dir_inode_operations = { ++ .lookup = ubifs_lookup, ++ .create = ubifs_create, ++ .link = ubifs_link, ++ .symlink = ubifs_symlink, ++ .unlink = ubifs_unlink, ++ .mkdir = ubifs_mkdir, ++ .rmdir = ubifs_rmdir, ++ .mknod = ubifs_mknod, ++ .rename = ubifs_rename, ++ .setattr = ubifs_setattr, ++ .getattr = ubifs_getattr, ++#ifdef CONFIG_UBIFS_FS_XATTR ++ .setxattr = ubifs_setxattr, ++ .getxattr = ubifs_getxattr, ++ .listxattr = ubifs_listxattr, ++ .removexattr = ubifs_removexattr, ++#endif ++}; ++ ++const struct file_operations ubifs_dir_operations = { ++ .llseek = ubifs_dir_llseek, ++ .release = ubifs_dir_release, ++ .read = generic_read_dir, ++ .readdir = ubifs_readdir, ++ .fsync = ubifs_fsync, ++ .unlocked_ioctl = ubifs_ioctl, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = ubifs_compat_ioctl, ++#endif ++}; +diff -Nurd linux-2.6.24/fs/ubifs/file.c ubifs-v2.6.24/fs/ubifs/file.c +--- linux-2.6.24/fs/ubifs/file.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/file.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1593 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file implements VFS file and inode operations of regular files, device ++ * nodes and symlinks as well as address space operations. ++ * ++ * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the ++ * page is dirty and is used for budgeting purposes - dirty pages should not be ++ * budgeted. The PG_checked flag is set if full budgeting is required for the ++ * page e.g., when it corresponds to a file hole or it is just beyond the file ++ * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to ++ * fail in this function, and the budget is released in 'ubifs_write_end()'. So ++ * the PG_private and PG_checked flags carry the information about how the page ++ * was budgeted, to make it possible to release the budget properly. ++ * ++ * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations ++ * we implement. However, this is not true for '->writepage()', which might be ++ * called with 'i_mutex' unlocked. For example, when pdflush is performing ++ * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the ++ * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is ++ * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim ++ * path'. So, in '->writepage()' we are only guaranteed that the page is ++ * locked. ++ * ++ * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., ++ * readahead path does not have it locked ("sys_read -> generic_file_aio_read ++ * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is ++ * not set as well. However, UBIFS disables readahead. ++ * ++ * This, for example means that there might be 2 concurrent '->writepage()' ++ * calls for the same inode, but different inode dirty pages. ++ */ ++ ++#include "ubifs.h" ++#include <linux/mount.h> ++ ++static int read_block(struct inode *inode, void *addr, unsigned int block, ++ struct ubifs_data_node *dn) ++{ ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ int err, len, out_len; ++ union ubifs_key key; ++ unsigned int dlen; ++ ++ data_key_init(c, &key, inode->i_ino, block); ++ err = ubifs_tnc_lookup(c, &key, dn); ++ if (err) { ++ if (err == -ENOENT) ++ /* Not found, so it must be a hole */ ++ memset(addr, 0, UBIFS_BLOCK_SIZE); ++ return err; ++ } ++ ++ ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ++ ubifs_inode(inode)->creat_sqnum); ++ len = le32_to_cpu(dn->size); ++ if (len <= 0 || len > UBIFS_BLOCK_SIZE) ++ goto dump; ++ ++ dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; ++ out_len = UBIFS_BLOCK_SIZE; ++ err = ubifs_decompress(&dn->data, dlen, addr, &out_len, ++ le16_to_cpu(dn->compr_type)); ++ if (err || len != out_len) ++ goto dump; ++ ++ /* ++ * Data length can be less than a full block, even for blocks that are ++ * not the last in the file (e.g., as a result of making a hole and ++ * appending data). Ensure that the remainder is zeroed out. ++ */ ++ if (len < UBIFS_BLOCK_SIZE) ++ memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); ++ ++ return 0; ++ ++dump: ++ ubifs_err("bad data node (block %u, inode %lu)", ++ block, inode->i_ino); ++ dbg_dump_node(c, dn); ++ return -EINVAL; ++} ++ ++static int do_readpage(struct page *page) ++{ ++ void *addr; ++ int err = 0, i; ++ unsigned int block, beyond; ++ struct ubifs_data_node *dn; ++ struct inode *inode = page->mapping->host; ++ loff_t i_size = i_size_read(inode); ++ ++ dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", ++ inode->i_ino, page->index, i_size, page->flags); ++ ubifs_assert(!PageChecked(page)); ++ ubifs_assert(!PagePrivate(page)); ++ ++ addr = kmap(page); ++ ++ block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; ++ if (block >= beyond) { ++ /* Reading beyond inode */ ++ SetPageChecked(page); ++ memset(addr, 0, PAGE_CACHE_SIZE); ++ goto out; ++ } ++ ++ dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); ++ if (!dn) { ++ err = -ENOMEM; ++ goto error; ++ } ++ ++ i = 0; ++ while (1) { ++ int ret; ++ ++ if (block >= beyond) { ++ /* Reading beyond inode */ ++ err = -ENOENT; ++ memset(addr, 0, UBIFS_BLOCK_SIZE); ++ } else { ++ ret = read_block(inode, addr, block, dn); ++ if (ret) { ++ err = ret; ++ if (err != -ENOENT) ++ break; ++ } else if (block + 1 == beyond) { ++ int dlen = le32_to_cpu(dn->size); ++ int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); ++ ++ if (ilen && ilen < dlen) ++ memset(addr + ilen, 0, dlen - ilen); ++ } ++ } ++ if (++i >= UBIFS_BLOCKS_PER_PAGE) ++ break; ++ block += 1; ++ addr += UBIFS_BLOCK_SIZE; ++ } ++ if (err) { ++ if (err == -ENOENT) { ++ /* Not found, so it must be a hole */ ++ SetPageChecked(page); ++ dbg_gen("hole"); ++ goto out_free; ++ } ++ ubifs_err("cannot read page %lu of inode %lu, error %d", ++ page->index, inode->i_ino, err); ++ goto error; ++ } ++ ++out_free: ++ kfree(dn); ++out: ++ SetPageUptodate(page); ++ ClearPageError(page); ++ flush_dcache_page(page); ++ kunmap(page); ++ return 0; ++ ++error: ++ kfree(dn); ++ ClearPageUptodate(page); ++ SetPageError(page); ++ flush_dcache_page(page); ++ kunmap(page); ++ return err; ++} ++ ++/** ++ * release_new_page_budget - release budget of a new page. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function which releases budget corresponding to the budget ++ * of one new page of data. ++ */ ++static void release_new_page_budget(struct ubifs_info *c) ++{ ++ struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 }; ++ ++ ubifs_release_budget(c, &req); ++} ++ ++/** ++ * release_existing_page_budget - release budget of an existing page. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function which releases budget corresponding to the budget ++ * of changing one one page of data which already exists on the flash media. ++ */ ++static void release_existing_page_budget(struct ubifs_info *c) ++{ ++ struct ubifs_budget_req req = { .dd_growth = c->page_budget}; ++ ++ ubifs_release_budget(c, &req); ++} ++ ++static int write_begin_slow(struct address_space *mapping, ++ loff_t pos, unsigned len, struct page **pagep) ++{ ++ struct inode *inode = mapping->host; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ pgoff_t index = pos >> PAGE_CACHE_SHIFT; ++ struct ubifs_budget_req req = { .new_page = 1 }; ++ int uninitialized_var(err), appending = !!(pos + len > inode->i_size); ++ struct page *page; ++ ++ dbg_gen("ino %lu, pos %llu, len %u, i_size %lld", ++ inode->i_ino, pos, len, inode->i_size); ++ ++ /* ++ * At the slow path we have to budget before locking the page, because ++ * budgeting may force write-back, which would wait on locked pages and ++ * deadlock if we had the page locked. At this point we do not know ++ * anything about the page, so assume that this is a new page which is ++ * written to a hole. This corresponds to largest budget. Later the ++ * budget will be amended if this is not true. ++ */ ++ if (appending) ++ /* We are appending data, budget for inode change */ ++ req.dirtied_ino = 1; ++ ++ err = ubifs_budget_space(c, &req); ++ if (unlikely(err)) ++ return err; ++ ++ page = __grab_cache_page(mapping, index); ++ if (unlikely(!page)) { ++ ubifs_release_budget(c, &req); ++ return -ENOMEM; ++ } ++ ++ if (!PageUptodate(page)) { ++ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) ++ SetPageChecked(page); ++ else { ++ err = do_readpage(page); ++ if (err) { ++ unlock_page(page); ++ page_cache_release(page); ++ return err; ++ } ++ } ++ ++ SetPageUptodate(page); ++ ClearPageError(page); ++ } ++ ++ if (PagePrivate(page)) ++ /* ++ * The page is dirty, which means it was budgeted twice: ++ * o first time the budget was allocated by the task which ++ * made the page dirty and set the PG_private flag; ++ * o and then we budgeted for it for the second time at the ++ * very beginning of this function. ++ * ++ * So what we have to do is to release the page budget we ++ * allocated. ++ */ ++ release_new_page_budget(c); ++ else if (!PageChecked(page)) ++ /* ++ * We are changing a page which already exists on the media. ++ * This means that changing the page does not make the amount ++ * of indexing information larger, and this part of the budget ++ * which we have already acquired may be released. ++ */ ++ ubifs_convert_page_budget(c); ++ ++ if (appending) { ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ /* ++ * 'ubifs_write_end()' is optimized from the fast-path part of ++ * 'ubifs_write_begin()' and expects the @ui_mutex to be locked ++ * if data is appended. ++ */ ++ mutex_lock(&ui->ui_mutex); ++ if (ui->dirty) ++ /* ++ * The inode is dirty already, so we may free the ++ * budget we allocated. ++ */ ++ ubifs_release_dirty_inode_budget(c, ui); ++ } ++ ++ *pagep = page; ++ return 0; ++} ++ ++/** ++ * allocate_budget - allocate budget for 'ubifs_write_begin()'. ++ * @c: UBIFS file-system description object ++ * @page: page to allocate budget for ++ * @ui: UBIFS inode object the page belongs to ++ * @appending: non-zero if the page is appended ++ * ++ * This is a helper function for 'ubifs_write_begin()' which allocates budget ++ * for the operation. The budget is allocated differently depending on whether ++ * this is appending, whether the page is dirty or not, and so on. This ++ * function leaves the @ui->ui_mutex locked in case of appending. Returns zero ++ * in case of success and %-ENOSPC in case of failure. ++ */ ++static int allocate_budget(struct ubifs_info *c, struct page *page, ++ struct ubifs_inode *ui, int appending) ++{ ++ struct ubifs_budget_req req = { .fast = 1 }; ++ ++ if (PagePrivate(page)) { ++ if (!appending) ++ /* ++ * The page is dirty and we are not appending, which ++ * means no budget is needed at all. ++ */ ++ return 0; ++ ++ mutex_lock(&ui->ui_mutex); ++ if (ui->dirty) ++ /* ++ * The page is dirty and we are appending, so the inode ++ * has to be marked as dirty. However, it is already ++ * dirty, so we do not need any budget. We may return, ++ * but @ui->ui_mutex hast to be left locked because we ++ * should prevent write-back from flushing the inode ++ * and freeing the budget. The lock will be released in ++ * 'ubifs_write_end()'. ++ */ ++ return 0; ++ ++ /* ++ * The page is dirty, we are appending, the inode is clean, so ++ * we need to budget the inode change. ++ */ ++ req.dirtied_ino = 1; ++ } else { ++ if (PageChecked(page)) ++ /* ++ * The page corresponds to a hole and does not ++ * exist on the media. So changing it makes ++ * make the amount of indexing information ++ * larger, and we have to budget for a new ++ * page. ++ */ ++ req.new_page = 1; ++ else ++ /* ++ * Not a hole, the change will not add any new ++ * indexing information, budget for page ++ * change. ++ */ ++ req.dirtied_page = 1; ++ ++ if (appending) { ++ mutex_lock(&ui->ui_mutex); ++ if (!ui->dirty) ++ /* ++ * The inode is clean but we will have to mark ++ * it as dirty because we are appending. This ++ * needs a budget. ++ */ ++ req.dirtied_ino = 1; ++ } ++ } ++ ++ return ubifs_budget_space(c, &req); ++} ++ ++/* ++ * This function is called when a page of data is going to be written. Since ++ * the page of data will not necessarily go to the flash straight away, UBIFS ++ * has to reserve space on the media for it, which is done by means of ++ * budgeting. ++ * ++ * This is the hot-path of the file-system and we are trying to optimize it as ++ * much as possible. For this reasons it is split on 2 parts - slow and fast. ++ * ++ * There many budgeting cases: ++ * o a new page is appended - we have to budget for a new page and for ++ * changing the inode; however, if the inode is already dirty, there is ++ * no need to budget for it; ++ * o an existing clean page is changed - we have budget for it; if the page ++ * does not exist on the media (a hole), we have to budget for a new ++ * page; otherwise, we may budget for changing an existing page; the ++ * difference between these cases is that changing an existing page does ++ * not introduce anything new to the FS indexing information, so it does ++ * not grow, and smaller budget is acquired in this case; ++ * o an existing dirty page is changed - no need to budget at all, because ++ * the page budget has been acquired by earlier, when the page has been ++ * marked dirty. ++ * ++ * UBIFS budgeting sub-system may force write-back if it thinks there is no ++ * space to reserve. This imposes some locking restrictions and makes it ++ * impossible to take into account the above cases, and makes it impossible to ++ * optimize budgeting. ++ * ++ * The solution for this is that the fast path of 'ubifs_write_begin()' assumes ++ * there is a plenty of flash space and the budget will be acquired quickly, ++ * without forcing write-back. The slow path does not make this assumption. ++ */ ++static int ubifs_write_begin(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned flags, ++ struct page **pagep, void **fsdata) ++{ ++ struct inode *inode = mapping->host; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ pgoff_t index = pos >> PAGE_CACHE_SHIFT; ++ int uninitialized_var(err), appending = !!(pos + len > inode->i_size); ++ int skipped_read = 0; ++ struct page *page; ++ ++ ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); ++ ++ if (unlikely(c->ro_media)) ++ return -EROFS; ++ ++ /* Try out the fast-path part first */ ++ page = __grab_cache_page(mapping, index); ++ if (unlikely(!page)) ++ return -ENOMEM; ++ ++ if (!PageUptodate(page)) { ++ /* The page is not loaded from the flash */ ++ if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { ++ /* ++ * We change whole page so no need to load it. But we ++ * have to set the @PG_checked flag to make the further ++ * code the page is new. This might be not true, but it ++ * is better to budget more that to read the page from ++ * the media. ++ */ ++ SetPageChecked(page); ++ skipped_read = 1; ++ } else { ++ err = do_readpage(page); ++ if (err) { ++ unlock_page(page); ++ page_cache_release(page); ++ return err; ++ } ++ } ++ ++ SetPageUptodate(page); ++ ClearPageError(page); ++ } ++ ++ err = allocate_budget(c, page, ui, appending); ++ if (unlikely(err)) { ++ ubifs_assert(err == -ENOSPC); ++ /* ++ * If we skipped reading the page because we were going to ++ * write all of it, then it is not up to date. ++ */ ++ if (skipped_read) { ++ ClearPageChecked(page); ++ ClearPageUptodate(page); ++ } ++ /* ++ * Budgeting failed which means it would have to force ++ * write-back but didn't, because we set the @fast flag in the ++ * request. Write-back cannot be done now, while we have the ++ * page locked, because it would deadlock. Unlock and free ++ * everything and fall-back to slow-path. ++ */ ++ if (appending) { ++ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); ++ mutex_unlock(&ui->ui_mutex); ++ } ++ unlock_page(page); ++ page_cache_release(page); ++ ++ return write_begin_slow(mapping, pos, len, pagep); ++ } ++ ++ /* ++ * Whee, we aquired budgeting quickly - without involving ++ * garbage-collection, committing or forceing write-back. We return ++ * with @ui->ui_mutex locked if we are appending pages, and unlocked ++ * otherwise. This is an optimization (slightly hacky though). ++ */ ++ *pagep = page; ++ return 0; ++ ++} ++ ++/** ++ * cancel_budget - cancel budget. ++ * @c: UBIFS file-system description object ++ * @page: page to cancel budget for ++ * @ui: UBIFS inode object the page belongs to ++ * @appending: non-zero if the page is appended ++ * ++ * This is a helper function for a page write operation. It unlocks the ++ * @ui->ui_mutex in case of appending. ++ */ ++static void cancel_budget(struct ubifs_info *c, struct page *page, ++ struct ubifs_inode *ui, int appending) ++{ ++ if (appending) { ++ if (!ui->dirty) ++ ubifs_release_dirty_inode_budget(c, ui); ++ mutex_unlock(&ui->ui_mutex); ++ } ++ if (!PagePrivate(page)) { ++ if (PageChecked(page)) ++ release_new_page_budget(c); ++ else ++ release_existing_page_budget(c); ++ } ++} ++ ++static int ubifs_write_end(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned copied, ++ struct page *page, void *fsdata) ++{ ++ struct inode *inode = mapping->host; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ loff_t end_pos = pos + len; ++ int appending = !!(end_pos > inode->i_size); ++ ++ dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld", ++ inode->i_ino, pos, page->index, len, copied, inode->i_size); ++ ++ if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) { ++ /* ++ * VFS copied less data to the page that it intended and ++ * declared in its '->write_begin()' call via the @len ++ * argument. If the page was not up-to-date, and @len was ++ * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did ++ * not load it from the media (for optimization reasons). This ++ * means that part of the page contains garbage. So read the ++ * page now. ++ */ ++ dbg_gen("copied %d instead of %d, read page and repeat", ++ copied, len); ++ cancel_budget(c, page, ui, appending); ++ ++ /* ++ * Return 0 to force VFS to repeat the whole operation, or the ++ * error code if 'do_readpage()' failes. ++ */ ++ copied = do_readpage(page); ++ goto out; ++ } ++ ++ if (!PagePrivate(page)) { ++ SetPagePrivate(page); ++ atomic_long_inc(&c->dirty_pg_cnt); ++ __set_page_dirty_nobuffers(page); ++ } ++ ++ if (appending) { ++ i_size_write(inode, end_pos); ++ ui->ui_size = end_pos; ++ /* ++ * Note, we do not set @I_DIRTY_PAGES (which means that the ++ * inode has dirty pages), this has been done in ++ * '__set_page_dirty_nobuffers()'. ++ */ ++ __mark_inode_dirty(inode, I_DIRTY_DATASYNC); ++ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); ++ mutex_unlock(&ui->ui_mutex); ++ } ++ ++out: ++ unlock_page(page); ++ page_cache_release(page); ++ return copied; ++} ++ ++/** ++ * populate_page - copy data nodes into a page for bulk-read. ++ * @c: UBIFS file-system description object ++ * @page: page ++ * @bu: bulk-read information ++ * @n: next zbranch slot ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int populate_page(struct ubifs_info *c, struct page *page, ++ struct bu_info *bu, int *n) ++{ ++ int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; ++ struct inode *inode = page->mapping->host; ++ loff_t i_size = i_size_read(inode); ++ unsigned int page_block; ++ void *addr, *zaddr; ++ pgoff_t end_index; ++ ++ dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", ++ inode->i_ino, page->index, i_size, page->flags); ++ ++ addr = zaddr = kmap(page); ++ ++ end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; ++ if (!i_size || page->index > end_index) { ++ hole = 1; ++ memset(addr, 0, PAGE_CACHE_SIZE); ++ goto out_hole; ++ } ++ ++ page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ while (1) { ++ int err, len, out_len, dlen; ++ ++ if (nn >= bu->cnt) { ++ hole = 1; ++ memset(addr, 0, UBIFS_BLOCK_SIZE); ++ } else if (key_block(c, &bu->zbranch[nn].key) == page_block) { ++ struct ubifs_data_node *dn; ++ ++ dn = bu->buf + (bu->zbranch[nn].offs - offs); ++ ++ ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ++ ubifs_inode(inode)->creat_sqnum); ++ ++ len = le32_to_cpu(dn->size); ++ if (len <= 0 || len > UBIFS_BLOCK_SIZE) ++ goto out_err; ++ ++ dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; ++ out_len = UBIFS_BLOCK_SIZE; ++ err = ubifs_decompress(&dn->data, dlen, addr, &out_len, ++ le16_to_cpu(dn->compr_type)); ++ if (err || len != out_len) ++ goto out_err; ++ ++ if (len < UBIFS_BLOCK_SIZE) ++ memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); ++ ++ nn += 1; ++ read = (i << UBIFS_BLOCK_SHIFT) + len; ++ } else if (key_block(c, &bu->zbranch[nn].key) < page_block) { ++ nn += 1; ++ continue; ++ } else { ++ hole = 1; ++ memset(addr, 0, UBIFS_BLOCK_SIZE); ++ } ++ if (++i >= UBIFS_BLOCKS_PER_PAGE) ++ break; ++ addr += UBIFS_BLOCK_SIZE; ++ page_block += 1; ++ } ++ ++ if (end_index == page->index) { ++ int len = i_size & (PAGE_CACHE_SIZE - 1); ++ ++ if (len && len < read) ++ memset(zaddr + len, 0, read - len); ++ } ++ ++out_hole: ++ if (hole) { ++ SetPageChecked(page); ++ dbg_gen("hole"); ++ } ++ ++ SetPageUptodate(page); ++ ClearPageError(page); ++ flush_dcache_page(page); ++ kunmap(page); ++ *n = nn; ++ return 0; ++ ++out_err: ++ ClearPageUptodate(page); ++ SetPageError(page); ++ flush_dcache_page(page); ++ kunmap(page); ++ ubifs_err("bad data node (block %u, inode %lu)", ++ page_block, inode->i_ino); ++ return -EINVAL; ++} ++ ++/** ++ * ubifs_do_bulk_read - do bulk-read. ++ * @c: UBIFS file-system description object ++ * @bu: bulk-read information ++ * @page1: first page to read ++ * ++ * This function returns %1 if the bulk-read is done, otherwise %0 is returned. ++ */ ++static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, ++ struct page *page1) ++{ ++ pgoff_t offset = page1->index, end_index; ++ struct address_space *mapping = page1->mapping; ++ struct inode *inode = mapping->host; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ int err, page_idx, page_cnt, ret = 0, n = 0; ++ int allocate = bu->buf ? 0 : 1; ++ loff_t isize; ++ ++ err = ubifs_tnc_get_bu_keys(c, bu); ++ if (err) ++ goto out_warn; ++ ++ if (bu->eof) { ++ /* Turn off bulk-read at the end of the file */ ++ ui->read_in_a_row = 1; ++ ui->bulk_read = 0; ++ } ++ ++ page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ if (!page_cnt) { ++ /* ++ * This happens when there are multiple blocks per page and the ++ * blocks for the first page we are looking for, are not ++ * together. If all the pages were like this, bulk-read would ++ * reduce performance, so we turn it off for a while. ++ */ ++ goto out_bu_off; ++ } ++ ++ if (bu->cnt) { ++ if (allocate) { ++ /* ++ * Allocate bulk-read buffer depending on how many data ++ * nodes we are going to read. ++ */ ++ bu->buf_len = bu->zbranch[bu->cnt - 1].offs + ++ bu->zbranch[bu->cnt - 1].len - ++ bu->zbranch[0].offs; ++ ubifs_assert(bu->buf_len > 0); ++ ubifs_assert(bu->buf_len <= c->leb_size); ++ bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN); ++ if (!bu->buf) ++ goto out_bu_off; ++ } ++ ++ err = ubifs_tnc_bulk_read(c, bu); ++ if (err) ++ goto out_warn; ++ } ++ ++ err = populate_page(c, page1, bu, &n); ++ if (err) ++ goto out_warn; ++ ++ unlock_page(page1); ++ ret = 1; ++ ++ isize = i_size_read(inode); ++ if (isize == 0) ++ goto out_free; ++ end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); ++ ++ for (page_idx = 1; page_idx < page_cnt; page_idx++) { ++ pgoff_t page_offset = offset + page_idx; ++ struct page *page; ++ ++ if (page_offset > end_index) ++ break; ++ page = find_or_create_page(mapping, page_offset, ++ GFP_NOFS | __GFP_COLD); ++ if (!page) ++ break; ++ if (!PageUptodate(page)) ++ err = populate_page(c, page, bu, &n); ++ unlock_page(page); ++ page_cache_release(page); ++ if (err) ++ break; ++ } ++ ++ ui->last_page_read = offset + page_idx - 1; ++ ++out_free: ++ if (allocate) ++ kfree(bu->buf); ++ return ret; ++ ++out_warn: ++ ubifs_warn("ignoring error %d and skipping bulk-read", err); ++ goto out_free; ++ ++out_bu_off: ++ ui->read_in_a_row = ui->bulk_read = 0; ++ goto out_free; ++} ++ ++/** ++ * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. ++ * @page: page from which to start bulk-read. ++ * ++ * Some flash media are capable of reading sequentially at faster rates. UBIFS ++ * bulk-read facility is designed to take advantage of that, by reading in one ++ * go consecutive data nodes that are also located consecutively in the same ++ * LEB. This function returns %1 if a bulk-read is done and %0 otherwise. ++ */ ++static int ubifs_bulk_read(struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ pgoff_t index = page->index, last_page_read = ui->last_page_read; ++ struct bu_info *bu; ++ int err = 0, allocated = 0; ++ ++ ui->last_page_read = index; ++ if (!c->bulk_read) ++ return 0; ++ ++ /* ++ * Bulk-read is protected by @ui->ui_mutex, but it is an optimization, ++ * so don't bother if we cannot lock the mutex. ++ */ ++ if (!mutex_trylock(&ui->ui_mutex)) ++ return 0; ++ ++ if (index != last_page_read + 1) { ++ /* Turn off bulk-read if we stop reading sequentially */ ++ ui->read_in_a_row = 1; ++ if (ui->bulk_read) ++ ui->bulk_read = 0; ++ goto out_unlock; ++ } ++ ++ if (!ui->bulk_read) { ++ ui->read_in_a_row += 1; ++ if (ui->read_in_a_row < 3) ++ goto out_unlock; ++ /* Three reads in a row, so switch on bulk-read */ ++ ui->bulk_read = 1; ++ } ++ ++ /* ++ * If possible, try to use pre-allocated bulk-read information, which ++ * is protected by @c->bu_mutex. ++ */ ++ if (mutex_trylock(&c->bu_mutex)) ++ bu = &c->bu; ++ else { ++ bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN); ++ if (!bu) ++ goto out_unlock; ++ ++ bu->buf = NULL; ++ allocated = 1; ++ } ++ ++ bu->buf_len = c->max_bu_buf_len; ++ data_key_init(c, &bu->key, inode->i_ino, ++ page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT); ++ err = ubifs_do_bulk_read(c, bu, page); ++ ++ if (!allocated) ++ mutex_unlock(&c->bu_mutex); ++ else ++ kfree(bu); ++ ++out_unlock: ++ mutex_unlock(&ui->ui_mutex); ++ return err; ++} ++ ++static int ubifs_readpage(struct file *file, struct page *page) ++{ ++ if (ubifs_bulk_read(page)) ++ return 0; ++ do_readpage(page); ++ unlock_page(page); ++ return 0; ++} ++ ++static int do_writepage(struct page *page, int len) ++{ ++ int err = 0, i, blen; ++ unsigned int block; ++ void *addr; ++ union ubifs_key key; ++ struct inode *inode = page->mapping->host; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ ++#ifdef UBIFS_DEBUG ++ spin_lock(&ui->ui_lock); ++ ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE); ++ spin_unlock(&ui->ui_lock); ++#endif ++ ++ /* Update radix tree tags */ ++ set_page_writeback(page); ++ ++ addr = kmap(page); ++ block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ i = 0; ++ while (len) { ++ blen = min_t(int, len, UBIFS_BLOCK_SIZE); ++ data_key_init(c, &key, inode->i_ino, block); ++ err = ubifs_jnl_write_data(c, inode, &key, addr, blen); ++ if (err) ++ break; ++ if (++i >= UBIFS_BLOCKS_PER_PAGE) ++ break; ++ block += 1; ++ addr += blen; ++ len -= blen; ++ } ++ if (err) { ++ SetPageError(page); ++ ubifs_err("cannot write page %lu of inode %lu, error %d", ++ page->index, inode->i_ino, err); ++ ubifs_ro_mode(c, err); ++ } ++ ++ ubifs_assert(PagePrivate(page)); ++ if (PageChecked(page)) ++ release_new_page_budget(c); ++ else ++ release_existing_page_budget(c); ++ ++ atomic_long_dec(&c->dirty_pg_cnt); ++ ClearPagePrivate(page); ++ ClearPageChecked(page); ++ ++ kunmap(page); ++ unlock_page(page); ++ end_page_writeback(page); ++ return err; ++} ++ ++/* ++ * When writing-back dirty inodes, VFS first writes-back pages belonging to the ++ * inode, then the inode itself. For UBIFS this may cause a problem. Consider a ++ * situation when a we have an inode with size 0, then a megabyte of data is ++ * appended to the inode, then write-back starts and flushes some amount of the ++ * dirty pages, the journal becomes full, commit happens and finishes, and then ++ * an unclean reboot happens. When the file system is mounted next time, the ++ * inode size would still be 0, but there would be many pages which are beyond ++ * the inode size, they would be indexed and consume flash space. Because the ++ * journal has been committed, the replay would not be able to detect this ++ * situation and correct the inode size. This means UBIFS would have to scan ++ * whole index and correct all inode sizes, which is long an unacceptable. ++ * ++ * To prevent situations like this, UBIFS writes pages back only if they are ++ * within the last synchronized inode size, i.e. the size which has been ++ * written to the flash media last time. Otherwise, UBIFS forces inode ++ * write-back, thus making sure the on-flash inode contains current inode size, ++ * and then keeps writing pages back. ++ * ++ * Some locking issues explanation. 'ubifs_writepage()' first is called with ++ * the page locked, and it locks @ui_mutex. However, write-back does take inode ++ * @i_mutex, which means other VFS operations may be run on this inode at the ++ * same time. And the problematic one is truncation to smaller size, from where ++ * we have to call 'vmtruncate()', which first changes @inode->i_size, then ++ * drops the truncated pages. And while dropping the pages, it takes the page ++ * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with ++ * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This ++ * means that @inode->i_size is changed while @ui_mutex is unlocked. ++ * ++ * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond ++ * inode size. How do we do this if @inode->i_size may became smaller while we ++ * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the ++ * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size ++ * internally and updates it under @ui_mutex. ++ * ++ * Q: why we do not worry that if we race with truncation, we may end up with a ++ * situation when the inode is truncated while we are in the middle of ++ * 'do_writepage()', so we do write beyond inode size? ++ * A: If we are in the middle of 'do_writepage()', truncation would be locked ++ * on the page lock and it would not write the truncated inode node to the ++ * journal before we have finished. ++ */ ++static int ubifs_writepage(struct page *page, struct writeback_control *wbc) ++{ ++ struct inode *inode = page->mapping->host; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ loff_t i_size = i_size_read(inode), synced_i_size; ++ pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; ++ int err, len = i_size & (PAGE_CACHE_SIZE - 1); ++ void *kaddr; ++ ++ dbg_gen("ino %lu, pg %lu, pg flags %#lx", ++ inode->i_ino, page->index, page->flags); ++ ubifs_assert(PagePrivate(page)); ++ ++ /* Is the page fully outside @i_size? (truncate in progress) */ ++ if (page->index > end_index || (page->index == end_index && !len)) { ++ err = 0; ++ goto out_unlock; ++ } ++ ++ spin_lock(&ui->ui_lock); ++ synced_i_size = ui->synced_i_size; ++ spin_unlock(&ui->ui_lock); ++ ++ /* Is the page fully inside @i_size? */ ++ if (page->index < end_index) { ++ if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { ++ err = inode->i_sb->s_op->write_inode(inode, 1); ++ if (err) ++ goto out_unlock; ++ /* ++ * The inode has been written, but the write-buffer has ++ * not been synchronized, so in case of an unclean ++ * reboot we may end up with some pages beyond inode ++ * size, but they would be in the journal (because ++ * commit flushes write buffers) and recovery would deal ++ * with this. ++ */ ++ } ++ return do_writepage(page, PAGE_CACHE_SIZE); ++ } ++ ++ /* ++ * The page straddles @i_size. It must be zeroed out on each and every ++ * writepage invocation because it may be mmapped. "A file is mapped ++ * in multiples of the page size. For a file that is not a multiple of ++ * the page size, the remaining memory is zeroed when mapped, and ++ * writes to that region are not written out to the file." ++ */ ++ kaddr = kmap_atomic(page, KM_USER0); ++ memset(kaddr + len, 0, PAGE_CACHE_SIZE - len); ++ flush_dcache_page(page); ++ kunmap_atomic(kaddr, KM_USER0); ++ ++ if (i_size > synced_i_size) { ++ err = inode->i_sb->s_op->write_inode(inode, 1); ++ if (err) ++ goto out_unlock; ++ } ++ ++ return do_writepage(page, len); ++ ++out_unlock: ++ unlock_page(page); ++ return err; ++} ++ ++/** ++ * do_attr_changes - change inode attributes. ++ * @inode: inode to change attributes for ++ * @attr: describes attributes to change ++ */ ++static void do_attr_changes(struct inode *inode, const struct iattr *attr) ++{ ++ if (attr->ia_valid & ATTR_UID) ++ inode->i_uid = attr->ia_uid; ++ if (attr->ia_valid & ATTR_GID) ++ inode->i_gid = attr->ia_gid; ++ if (attr->ia_valid & ATTR_ATIME) ++ inode->i_atime = timespec_trunc(attr->ia_atime, ++ inode->i_sb->s_time_gran); ++ if (attr->ia_valid & ATTR_MTIME) ++ inode->i_mtime = timespec_trunc(attr->ia_mtime, ++ inode->i_sb->s_time_gran); ++ if (attr->ia_valid & ATTR_CTIME) ++ inode->i_ctime = timespec_trunc(attr->ia_ctime, ++ inode->i_sb->s_time_gran); ++ if (attr->ia_valid & ATTR_MODE) { ++ umode_t mode = attr->ia_mode; ++ ++ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) ++ mode &= ~S_ISGID; ++ inode->i_mode = mode; ++ } ++} ++ ++/** ++ * do_truncation - truncate an inode. ++ * @c: UBIFS file-system description object ++ * @inode: inode to truncate ++ * @attr: inode attribute changes description ++ * ++ * This function implements VFS '->setattr()' call when the inode is truncated ++ * to a smaller size. Returns zero in case of success and a negative error code ++ * in case of failure. ++ */ ++static int do_truncation(struct ubifs_info *c, struct inode *inode, ++ const struct iattr *attr) ++{ ++ int err; ++ struct ubifs_budget_req req; ++ loff_t old_size = inode->i_size, new_size = attr->ia_size; ++ int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); ++ memset(&req, 0, sizeof(struct ubifs_budget_req)); ++ ++ /* ++ * If this is truncation to a smaller size, and we do not truncate on a ++ * block boundary, budget for changing one data block, because the last ++ * block will be re-written. ++ */ ++ if (new_size & (UBIFS_BLOCK_SIZE - 1)) ++ req.dirtied_page = 1; ++ ++ req.dirtied_ino = 1; ++ /* A funny way to budget for truncation node */ ++ req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; ++ err = ubifs_budget_space(c, &req); ++ if (err) { ++ /* ++ * Treat truncations to zero as deletion and always allow them, ++ * just like we do for '->unlink()'. ++ */ ++ if (new_size || err != -ENOSPC) ++ return err; ++ budgeted = 0; ++ } ++ ++ err = vmtruncate(inode, new_size); ++ if (err) ++ goto out_budg; ++ ++ if (offset) { ++ pgoff_t index = new_size >> PAGE_CACHE_SHIFT; ++ struct page *page; ++ ++ page = find_lock_page(inode->i_mapping, index); ++ if (page) { ++ if (PageDirty(page)) { ++ /* ++ * 'ubifs_jnl_truncate()' will try to truncate ++ * the last data node, but it contains ++ * out-of-date data because the page is dirty. ++ * Write the page now, so that ++ * 'ubifs_jnl_truncate()' will see an already ++ * truncated (and up to date) data node. ++ */ ++ ubifs_assert(PagePrivate(page)); ++ ++ clear_page_dirty_for_io(page); ++ if (UBIFS_BLOCKS_PER_PAGE_SHIFT) ++ offset = new_size & ++ (PAGE_CACHE_SIZE - 1); ++ err = do_writepage(page, offset); ++ page_cache_release(page); ++ if (err) ++ goto out_budg; ++ /* ++ * We could now tell 'ubifs_jnl_truncate()' not ++ * to read the last block. ++ */ ++ } else { ++ /* ++ * We could 'kmap()' the page and pass the data ++ * to 'ubifs_jnl_truncate()' to save it from ++ * having to read it. ++ */ ++ unlock_page(page); ++ page_cache_release(page); ++ } ++ } ++ } ++ ++ mutex_lock(&ui->ui_mutex); ++ ui->ui_size = inode->i_size; ++ /* Truncation changes inode [mc]time */ ++ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); ++ /* The other attributes may be changed at the same time as well */ ++ do_attr_changes(inode, attr); ++ ++ err = ubifs_jnl_truncate(c, inode, old_size, new_size); ++ mutex_unlock(&ui->ui_mutex); ++out_budg: ++ if (budgeted) ++ ubifs_release_budget(c, &req); ++ else { ++ c->nospace = c->nospace_rp = 0; ++ smp_wmb(); ++ } ++ return err; ++} ++ ++/** ++ * do_setattr - change inode attributes. ++ * @c: UBIFS file-system description object ++ * @inode: inode to change attributes for ++ * @attr: inode attribute changes description ++ * ++ * This function implements VFS '->setattr()' call for all cases except ++ * truncations to smaller size. Returns zero in case of success and a negative ++ * error code in case of failure. ++ */ ++static int do_setattr(struct ubifs_info *c, struct inode *inode, ++ const struct iattr *attr) ++{ ++ int err, release; ++ loff_t new_size = attr->ia_size; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ struct ubifs_budget_req req = { .dirtied_ino = 1, ++ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ if (attr->ia_valid & ATTR_SIZE) { ++ dbg_gen("size %lld -> %lld", inode->i_size, new_size); ++ err = vmtruncate(inode, new_size); ++ if (err) ++ goto out; ++ } ++ ++ mutex_lock(&ui->ui_mutex); ++ if (attr->ia_valid & ATTR_SIZE) { ++ /* Truncation changes inode [mc]time */ ++ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); ++ /* 'vmtruncate()' changed @i_size, update @ui_size */ ++ ui->ui_size = inode->i_size; ++ } ++ ++ do_attr_changes(inode, attr); ++ ++ release = ui->dirty; ++ if (attr->ia_valid & ATTR_SIZE) ++ /* ++ * Inode length changed, so we have to make sure ++ * @I_DIRTY_DATASYNC is set. ++ */ ++ __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC); ++ else ++ mark_inode_dirty_sync(inode); ++ mutex_unlock(&ui->ui_mutex); ++ ++ if (release) ++ ubifs_release_budget(c, &req); ++ if (IS_SYNC(inode)) ++ err = inode->i_sb->s_op->write_inode(inode, 1); ++ return err; ++ ++out: ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++int ubifs_setattr(struct dentry *dentry, struct iattr *attr) ++{ ++ int err; ++ struct inode *inode = dentry->d_inode; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ ++ dbg_gen("ino %lu, mode %#x, ia_valid %#x", ++ inode->i_ino, inode->i_mode, attr->ia_valid); ++ err = inode_change_ok(inode, attr); ++ if (err) ++ return err; ++ ++ err = dbg_check_synced_i_size(inode); ++ if (err) ++ return err; ++ ++ if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size) ++ /* Truncation to a smaller size */ ++ err = do_truncation(c, inode, attr); ++ else ++ err = do_setattr(c, inode, attr); ++ ++ return err; ++} ++ ++static void ubifs_invalidatepage(struct page *page, unsigned long offset) ++{ ++ struct inode *inode = page->mapping->host; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ ++ ubifs_assert(PagePrivate(page)); ++ if (offset) ++ /* Partial page remains dirty */ ++ return; ++ ++ if (PageChecked(page)) ++ release_new_page_budget(c); ++ else ++ release_existing_page_budget(c); ++ ++ atomic_long_dec(&c->dirty_pg_cnt); ++ ClearPagePrivate(page); ++ ClearPageChecked(page); ++} ++ ++static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ struct ubifs_inode *ui = ubifs_inode(dentry->d_inode); ++ ++ nd_set_link(nd, ui->data); ++ return NULL; ++} ++ ++int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ int err; ++ ++ dbg_gen("syncing inode %lu", inode->i_ino); ++ ++ /* ++ * VFS has already synchronized dirty pages for this inode. Synchronize ++ * the inode unless this is a 'datasync()' call. ++ */ ++ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { ++ err = inode->i_sb->s_op->write_inode(inode, 1); ++ if (err) ++ return err; ++ } ++ ++ /* ++ * Nodes related to this inode may still sit in a write-buffer. Flush ++ * them. ++ */ ++ err = ubifs_sync_wbufs_by_inode(c, inode); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++/** ++ * mctime_update_needed - check if mtime or ctime update is needed. ++ * @inode: the inode to do the check for ++ * @now: current time ++ * ++ * This helper function checks if the inode mtime/ctime should be updated or ++ * not. If current values of the time-stamps are within the UBIFS inode time ++ * granularity, they are not updated. This is an optimization. ++ */ ++static inline int mctime_update_needed(struct inode *inode, ++ struct timespec *now) ++{ ++ if (!timespec_equal(&inode->i_mtime, now) || ++ !timespec_equal(&inode->i_ctime, now)) ++ return 1; ++ return 0; ++} ++ ++/** ++ * update_ctime - update mtime and ctime of an inode. ++ * @c: UBIFS file-system description object ++ * @inode: inode to update ++ * ++ * This function updates mtime and ctime of the inode if it is not equivalent to ++ * current time. Returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int update_mctime(struct ubifs_info *c, struct inode *inode) ++{ ++ struct timespec now = ubifs_current_time(inode); ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ if (mctime_update_needed(inode, &now)) { ++ int err, release; ++ struct ubifs_budget_req req = { .dirtied_ino = 1, ++ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ mutex_lock(&ui->ui_mutex); ++ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); ++ release = ui->dirty; ++ mark_inode_dirty_sync(inode); ++ mutex_unlock(&ui->ui_mutex); ++ if (release) ++ ubifs_release_budget(c, &req); ++ } ++ ++ return 0; ++} ++ ++static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, ++ unsigned long nr_segs, loff_t pos) ++{ ++ int err; ++ ssize_t ret; ++ struct inode *inode = iocb->ki_filp->f_mapping->host; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ ++ err = update_mctime(c, inode); ++ if (err) ++ return err; ++ ++ ret = generic_file_aio_write(iocb, iov, nr_segs, pos); ++ if (ret < 0) ++ return ret; ++ ++ if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { ++ err = ubifs_sync_wbufs_by_inode(c, inode); ++ if (err) ++ return err; ++ } ++ ++ return ret; ++} ++ ++static int ubifs_set_page_dirty(struct page *page) ++{ ++ int ret; ++ ++ ret = __set_page_dirty_nobuffers(page); ++ /* ++ * An attempt to dirty a page without budgeting for it - should not ++ * happen. ++ */ ++ ubifs_assert(ret == 0); ++ return ret; ++} ++ ++static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) ++{ ++ /* ++ * An attempt to release a dirty page without budgeting for it - should ++ * not happen. ++ */ ++ if (PageWriteback(page)) ++ return 0; ++ ubifs_assert(PagePrivate(page)); ++ ubifs_assert(0); ++ ClearPagePrivate(page); ++ ClearPageChecked(page); ++ return 1; ++} ++ ++/* ++ * mmap()d file has taken write protection fault and is being made ++ * writable. UBIFS must ensure page is budgeted for. ++ */ ++static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) ++{ ++ struct inode *inode = vma->vm_file->f_path.dentry->d_inode; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ struct timespec now = ubifs_current_time(inode); ++ struct ubifs_budget_req req = { .new_page = 1 }; ++ int err, update_time; ++ ++ dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, ++ i_size_read(inode)); ++ ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); ++ ++ if (unlikely(c->ro_media)) ++ return -EROFS; ++ ++ /* ++ * We have not locked @page so far so we may budget for changing the ++ * page. Note, we cannot do this after we locked the page, because ++ * budgeting may cause write-back which would cause deadlock. ++ * ++ * At the moment we do not know whether the page is dirty or not, so we ++ * assume that it is not and budget for a new page. We could look at ++ * the @PG_private flag and figure this out, but we may race with write ++ * back and the page state may change by the time we lock it, so this ++ * would need additional care. We do not bother with this at the ++ * moment, although it might be good idea to do. Instead, we allocate ++ * budget for a new page and amend it later on if the page was in fact ++ * dirty. ++ * ++ * The budgeting-related logic of this function is similar to what we ++ * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there ++ * for more comments. ++ */ ++ update_time = mctime_update_needed(inode, &now); ++ if (update_time) ++ /* ++ * We have to change inode time stamp which requires extra ++ * budgeting. ++ */ ++ req.dirtied_ino = 1; ++ ++ err = ubifs_budget_space(c, &req); ++ if (unlikely(err)) { ++ if (err == -ENOSPC) ++ ubifs_warn("out of space for mmapped file " ++ "(inode number %lu)", inode->i_ino); ++ return err; ++ } ++ ++ lock_page(page); ++ if (unlikely(page->mapping != inode->i_mapping || ++ page_offset(page) > i_size_read(inode))) { ++ /* Page got truncated out from underneath us */ ++ err = -EINVAL; ++ goto out_unlock; ++ } ++ ++ if (PagePrivate(page)) ++ release_new_page_budget(c); ++ else { ++ if (!PageChecked(page)) ++ ubifs_convert_page_budget(c); ++ SetPagePrivate(page); ++ atomic_long_inc(&c->dirty_pg_cnt); ++ __set_page_dirty_nobuffers(page); ++ } ++ ++ if (update_time) { ++ int release; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ mutex_lock(&ui->ui_mutex); ++ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); ++ release = ui->dirty; ++ mark_inode_dirty_sync(inode); ++ mutex_unlock(&ui->ui_mutex); ++ if (release) ++ ubifs_release_dirty_inode_budget(c, ui); ++ } ++ ++ unlock_page(page); ++ return 0; ++ ++out_unlock: ++ unlock_page(page); ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++static struct vm_operations_struct ubifs_file_vm_ops = { ++ .fault = filemap_fault, ++ .page_mkwrite = ubifs_vm_page_mkwrite, ++}; ++ ++static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ int err; ++ ++ /* 'generic_file_mmap()' takes care of NOMMU case */ ++ err = generic_file_mmap(file, vma); ++ if (err) ++ return err; ++ vma->vm_ops = &ubifs_file_vm_ops; ++ return 0; ++} ++ ++const struct address_space_operations ubifs_file_address_operations = { ++ .readpage = ubifs_readpage, ++ .writepage = ubifs_writepage, ++ .write_begin = ubifs_write_begin, ++ .write_end = ubifs_write_end, ++ .invalidatepage = ubifs_invalidatepage, ++ .set_page_dirty = ubifs_set_page_dirty, ++ .releasepage = ubifs_releasepage, ++}; ++ ++const struct inode_operations ubifs_file_inode_operations = { ++ .setattr = ubifs_setattr, ++ .getattr = ubifs_getattr, ++#ifdef CONFIG_UBIFS_FS_XATTR ++ .setxattr = ubifs_setxattr, ++ .getxattr = ubifs_getxattr, ++ .listxattr = ubifs_listxattr, ++ .removexattr = ubifs_removexattr, ++#endif ++}; ++ ++const struct inode_operations ubifs_symlink_inode_operations = { ++ .readlink = generic_readlink, ++ .follow_link = ubifs_follow_link, ++ .setattr = ubifs_setattr, ++ .getattr = ubifs_getattr, ++}; ++ ++const struct file_operations ubifs_file_operations = { ++ .llseek = generic_file_llseek, ++ .read = do_sync_read, ++ .write = do_sync_write, ++ .aio_read = generic_file_aio_read, ++ .aio_write = ubifs_aio_write, ++ .mmap = ubifs_file_mmap, ++ .fsync = ubifs_fsync, ++ .unlocked_ioctl = ubifs_ioctl, ++ .splice_read = generic_file_splice_read, ++ .splice_write = generic_file_splice_write, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = ubifs_compat_ioctl, ++#endif ++}; +diff -Nurd linux-2.6.24/fs/ubifs/find.c ubifs-v2.6.24/fs/ubifs/find.c +--- linux-2.6.24/fs/ubifs/find.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/find.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,977 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file contains functions for finding LEBs for various purposes e.g. ++ * garbage collection. In general, lprops category heaps and lists are used ++ * for fast access, falling back on scanning the LPT as a last resort. ++ */ ++ ++#include <linux/sort.h> ++#include "ubifs.h" ++ ++/** ++ * struct scan_data - data provided to scan callback functions ++ * @min_space: minimum number of bytes for which to scan ++ * @pick_free: whether it is OK to scan for empty LEBs ++ * @lnum: LEB number found is returned here ++ * @exclude_index: whether to exclude index LEBs ++ */ ++struct scan_data { ++ int min_space; ++ int pick_free; ++ int lnum; ++ int exclude_index; ++}; ++ ++/** ++ * valuable - determine whether LEB properties are valuable. ++ * @c: the UBIFS file-system description object ++ * @lprops: LEB properties ++ * ++ * This function return %1 if the LEB properties should be added to the LEB ++ * properties tree in memory. Otherwise %0 is returned. ++ */ ++static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops) ++{ ++ int n, cat = lprops->flags & LPROPS_CAT_MASK; ++ struct ubifs_lpt_heap *heap; ++ ++ switch (cat) { ++ case LPROPS_DIRTY: ++ case LPROPS_DIRTY_IDX: ++ case LPROPS_FREE: ++ heap = &c->lpt_heap[cat - 1]; ++ if (heap->cnt < heap->max_cnt) ++ return 1; ++ if (lprops->free + lprops->dirty >= c->dark_wm) ++ return 1; ++ return 0; ++ case LPROPS_EMPTY: ++ n = c->lst.empty_lebs + c->freeable_cnt - ++ c->lst.taken_empty_lebs; ++ if (n < c->lsave_cnt) ++ return 1; ++ return 0; ++ case LPROPS_FREEABLE: ++ return 1; ++ case LPROPS_FRDI_IDX: ++ return 1; ++ } ++ return 0; ++} ++ ++/** ++ * scan_for_dirty_cb - dirty space scan callback. ++ * @c: the UBIFS file-system description object ++ * @lprops: LEB properties to scan ++ * @in_tree: whether the LEB properties are in main memory ++ * @data: information passed to and from the caller of the scan ++ * ++ * This function returns a code that indicates whether the scan should continue ++ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree ++ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop ++ * (%LPT_SCAN_STOP). ++ */ ++static int scan_for_dirty_cb(struct ubifs_info *c, ++ const struct ubifs_lprops *lprops, int in_tree, ++ struct scan_data *data) ++{ ++ int ret = LPT_SCAN_CONTINUE; ++ ++ /* Exclude LEBs that are currently in use */ ++ if (lprops->flags & LPROPS_TAKEN) ++ return LPT_SCAN_CONTINUE; ++ /* Determine whether to add these LEB properties to the tree */ ++ if (!in_tree && valuable(c, lprops)) ++ ret |= LPT_SCAN_ADD; ++ /* Exclude LEBs with too little space */ ++ if (lprops->free + lprops->dirty < data->min_space) ++ return ret; ++ /* If specified, exclude index LEBs */ ++ if (data->exclude_index && lprops->flags & LPROPS_INDEX) ++ return ret; ++ /* If specified, exclude empty or freeable LEBs */ ++ if (lprops->free + lprops->dirty == c->leb_size) { ++ if (!data->pick_free) ++ return ret; ++ /* Exclude LEBs with too little dirty space (unless it is empty) */ ++ } else if (lprops->dirty < c->dead_wm) ++ return ret; ++ /* Finally we found space */ ++ data->lnum = lprops->lnum; ++ return LPT_SCAN_ADD | LPT_SCAN_STOP; ++} ++ ++/** ++ * scan_for_dirty - find a data LEB with free space. ++ * @c: the UBIFS file-system description object ++ * @min_space: minimum amount free plus dirty space the returned LEB has to ++ * have ++ * @pick_free: if it is OK to return a free or freeable LEB ++ * @exclude_index: whether to exclude index LEBs ++ * ++ * This function returns a pointer to the LEB properties found or a negative ++ * error code. ++ */ ++static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, ++ int min_space, int pick_free, ++ int exclude_index) ++{ ++ const struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ struct scan_data data; ++ int err, i; ++ ++ /* There may be an LEB with enough dirty space on the free heap */ ++ heap = &c->lpt_heap[LPROPS_FREE - 1]; ++ for (i = 0; i < heap->cnt; i++) { ++ lprops = heap->arr[i]; ++ if (lprops->free + lprops->dirty < min_space) ++ continue; ++ if (lprops->dirty < c->dead_wm) ++ continue; ++ return lprops; ++ } ++ /* ++ * A LEB may have fallen off of the bottom of the dirty heap, and ended ++ * up as uncategorized even though it has enough dirty space for us now, ++ * so check the uncategorized list. N.B. neither empty nor freeable LEBs ++ * can end up as uncategorized because they are kept on lists not ++ * finite-sized heaps. ++ */ ++ list_for_each_entry(lprops, &c->uncat_list, list) { ++ if (lprops->flags & LPROPS_TAKEN) ++ continue; ++ if (lprops->free + lprops->dirty < min_space) ++ continue; ++ if (exclude_index && (lprops->flags & LPROPS_INDEX)) ++ continue; ++ if (lprops->dirty < c->dead_wm) ++ continue; ++ return lprops; ++ } ++ /* We have looked everywhere in main memory, now scan the flash */ ++ if (c->pnodes_have >= c->pnode_cnt) ++ /* All pnodes are in memory, so skip scan */ ++ return ERR_PTR(-ENOSPC); ++ data.min_space = min_space; ++ data.pick_free = pick_free; ++ data.lnum = -1; ++ data.exclude_index = exclude_index; ++ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, ++ (ubifs_lpt_scan_callback)scan_for_dirty_cb, ++ &data); ++ if (err) ++ return ERR_PTR(err); ++ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); ++ c->lscan_lnum = data.lnum; ++ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); ++ if (IS_ERR(lprops)) ++ return lprops; ++ ubifs_assert(lprops->lnum == data.lnum); ++ ubifs_assert(lprops->free + lprops->dirty >= min_space); ++ ubifs_assert(lprops->dirty >= c->dead_wm || ++ (pick_free && ++ lprops->free + lprops->dirty == c->leb_size)); ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX)); ++ return lprops; ++} ++ ++/** ++ * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector. ++ * @c: the UBIFS file-system description object ++ * @ret_lp: LEB properties are returned here on exit ++ * @min_space: minimum amount free plus dirty space the returned LEB has to ++ * have ++ * @pick_free: controls whether it is OK to pick empty or index LEBs ++ * ++ * This function tries to find a dirty logical eraseblock which has at least ++ * @min_space free and dirty space. It prefers to take an LEB from the dirty or ++ * dirty index heap, and it falls-back to LPT scanning if the heaps are empty ++ * or do not have an LEB which satisfies the @min_space criteria. ++ * ++ * Note, LEBs which have less than dead watermark of free + dirty space are ++ * never picked by this function. ++ * ++ * The additional @pick_free argument controls if this function has to return a ++ * free or freeable LEB if one is present. For example, GC must to set it to %1, ++ * when called from the journal space reservation function, because the ++ * appearance of free space may coincide with the loss of enough dirty space ++ * for GC to succeed anyway. ++ * ++ * In contrast, if the Garbage Collector is called from budgeting, it should ++ * just make free space, not return LEBs which are already free or freeable. ++ * ++ * In addition @pick_free is set to %2 by the recovery process in order to ++ * recover gc_lnum in which case an index LEB must not be returned. ++ * ++ * This function returns zero and the LEB properties of found dirty LEB in case ++ * of success, %-ENOSPC if no dirty LEB was found and a negative error code in ++ * case of other failures. The returned LEB is marked as "taken". ++ */ ++int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, ++ int min_space, int pick_free) ++{ ++ int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0; ++ const struct ubifs_lprops *lp = NULL, *idx_lp = NULL; ++ struct ubifs_lpt_heap *heap, *idx_heap; ++ ++ ubifs_get_lprops(c); ++ ++ if (pick_free) { ++ int lebs, rsvd_idx_lebs = 0; ++ ++ spin_lock(&c->space_lock); ++ lebs = c->lst.empty_lebs + c->idx_gc_cnt; ++ lebs += c->freeable_cnt - c->lst.taken_empty_lebs; ++ ++ /* ++ * Note, the index may consume more LEBs than have been reserved ++ * for it. It is OK because it might be consolidated by GC. ++ * But if the index takes fewer LEBs than it is reserved for it, ++ * this function must avoid picking those reserved LEBs. ++ */ ++ if (c->min_idx_lebs >= c->lst.idx_lebs) { ++ rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ exclude_index = 1; ++ } ++ spin_unlock(&c->space_lock); ++ ++ /* Check if there are enough free LEBs for the index */ ++ if (rsvd_idx_lebs < lebs) { ++ /* OK, try to find an empty LEB */ ++ lp = ubifs_fast_find_empty(c); ++ if (lp) ++ goto found; ++ ++ /* Or a freeable LEB */ ++ lp = ubifs_fast_find_freeable(c); ++ if (lp) ++ goto found; ++ } else ++ /* ++ * We cannot pick free/freeable LEBs in the below code. ++ */ ++ pick_free = 0; ++ } else { ++ spin_lock(&c->space_lock); ++ exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); ++ spin_unlock(&c->space_lock); ++ } ++ ++ /* Look on the dirty and dirty index heaps */ ++ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; ++ idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; ++ ++ if (idx_heap->cnt && !exclude_index) { ++ idx_lp = idx_heap->arr[0]; ++ sum = idx_lp->free + idx_lp->dirty; ++ /* ++ * Since we reserve thrice as much space for the index than it ++ * actually takes, it does not make sense to pick indexing LEBs ++ * with less than, say, half LEB of dirty space. May be half is ++ * not the optimal boundary - this should be tested and ++ * checked. This boundary should determine how much we use ++ * in-the-gaps to consolidate the index comparing to how much ++ * we use garbage collector to consolidate it. The "half" ++ * criteria just feels to be fine. ++ */ ++ if (sum < min_space || sum < c->half_leb_size) ++ idx_lp = NULL; ++ } ++ ++ if (heap->cnt) { ++ lp = heap->arr[0]; ++ if (lp->dirty + lp->free < min_space) ++ lp = NULL; ++ } ++ ++ /* Pick the LEB with most space */ ++ if (idx_lp && lp) { ++ if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty) ++ lp = idx_lp; ++ } else if (idx_lp && !lp) ++ lp = idx_lp; ++ ++ if (lp) { ++ ubifs_assert(lp->free + lp->dirty >= c->dead_wm); ++ goto found; ++ } ++ ++ /* Did not find a dirty LEB on the dirty heaps, have to scan */ ++ dbg_find("scanning LPT for a dirty LEB"); ++ lp = scan_for_dirty(c, min_space, pick_free, exclude_index); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ubifs_assert(lp->dirty >= c->dead_wm || ++ (pick_free && lp->free + lp->dirty == c->leb_size)); ++ ++found: ++ dbg_find("found LEB %d, free %d, dirty %d, flags %#x", ++ lp->lnum, lp->free, lp->dirty, lp->flags); ++ ++ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, ++ lp->flags | LPROPS_TAKEN, 0); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ++ memcpy(ret_lp, lp, sizeof(struct ubifs_lprops)); ++ ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * scan_for_free_cb - free space scan callback. ++ * @c: the UBIFS file-system description object ++ * @lprops: LEB properties to scan ++ * @in_tree: whether the LEB properties are in main memory ++ * @data: information passed to and from the caller of the scan ++ * ++ * This function returns a code that indicates whether the scan should continue ++ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree ++ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop ++ * (%LPT_SCAN_STOP). ++ */ ++static int scan_for_free_cb(struct ubifs_info *c, ++ const struct ubifs_lprops *lprops, int in_tree, ++ struct scan_data *data) ++{ ++ int ret = LPT_SCAN_CONTINUE; ++ ++ /* Exclude LEBs that are currently in use */ ++ if (lprops->flags & LPROPS_TAKEN) ++ return LPT_SCAN_CONTINUE; ++ /* Determine whether to add these LEB properties to the tree */ ++ if (!in_tree && valuable(c, lprops)) ++ ret |= LPT_SCAN_ADD; ++ /* Exclude index LEBs */ ++ if (lprops->flags & LPROPS_INDEX) ++ return ret; ++ /* Exclude LEBs with too little space */ ++ if (lprops->free < data->min_space) ++ return ret; ++ /* If specified, exclude empty LEBs */ ++ if (!data->pick_free && lprops->free == c->leb_size) ++ return ret; ++ /* ++ * LEBs that have only free and dirty space must not be allocated ++ * because they may have been unmapped already or they may have data ++ * that is obsolete only because of nodes that are still sitting in a ++ * wbuf. ++ */ ++ if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0) ++ return ret; ++ /* Finally we found space */ ++ data->lnum = lprops->lnum; ++ return LPT_SCAN_ADD | LPT_SCAN_STOP; ++} ++ ++/** ++ * do_find_free_space - find a data LEB with free space. ++ * @c: the UBIFS file-system description object ++ * @min_space: minimum amount of free space required ++ * @pick_free: whether it is OK to scan for empty LEBs ++ * @squeeze: whether to try to find space in a non-empty LEB first ++ * ++ * This function returns a pointer to the LEB properties found or a negative ++ * error code. ++ */ ++static ++const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, ++ int min_space, int pick_free, ++ int squeeze) ++{ ++ const struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ struct scan_data data; ++ int err, i; ++ ++ if (squeeze) { ++ lprops = ubifs_fast_find_free(c); ++ if (lprops && lprops->free >= min_space) ++ return lprops; ++ } ++ if (pick_free) { ++ lprops = ubifs_fast_find_empty(c); ++ if (lprops) ++ return lprops; ++ } ++ if (!squeeze) { ++ lprops = ubifs_fast_find_free(c); ++ if (lprops && lprops->free >= min_space) ++ return lprops; ++ } ++ /* There may be an LEB with enough free space on the dirty heap */ ++ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; ++ for (i = 0; i < heap->cnt; i++) { ++ lprops = heap->arr[i]; ++ if (lprops->free >= min_space) ++ return lprops; ++ } ++ /* ++ * A LEB may have fallen off of the bottom of the free heap, and ended ++ * up as uncategorized even though it has enough free space for us now, ++ * so check the uncategorized list. N.B. neither empty nor freeable LEBs ++ * can end up as uncategorized because they are kept on lists not ++ * finite-sized heaps. ++ */ ++ list_for_each_entry(lprops, &c->uncat_list, list) { ++ if (lprops->flags & LPROPS_TAKEN) ++ continue; ++ if (lprops->flags & LPROPS_INDEX) ++ continue; ++ if (lprops->free >= min_space) ++ return lprops; ++ } ++ /* We have looked everywhere in main memory, now scan the flash */ ++ if (c->pnodes_have >= c->pnode_cnt) ++ /* All pnodes are in memory, so skip scan */ ++ return ERR_PTR(-ENOSPC); ++ data.min_space = min_space; ++ data.pick_free = pick_free; ++ data.lnum = -1; ++ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, ++ (ubifs_lpt_scan_callback)scan_for_free_cb, ++ &data); ++ if (err) ++ return ERR_PTR(err); ++ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); ++ c->lscan_lnum = data.lnum; ++ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); ++ if (IS_ERR(lprops)) ++ return lprops; ++ ubifs_assert(lprops->lnum == data.lnum); ++ ubifs_assert(lprops->free >= min_space); ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); ++ return lprops; ++} ++ ++/** ++ * ubifs_find_free_space - find a data LEB with free space. ++ * @c: the UBIFS file-system description object ++ * @min_space: minimum amount of required free space ++ * @offs: contains offset of where free space starts on exit ++ * @squeeze: whether to try to find space in a non-empty LEB first ++ * ++ * This function looks for an LEB with at least @min_space bytes of free space. ++ * It tries to find an empty LEB if possible. If no empty LEBs are available, ++ * this function searches for a non-empty data LEB. The returned LEB is marked ++ * as "taken". ++ * ++ * This function returns found LEB number in case of success, %-ENOSPC if it ++ * failed to find a LEB with @min_space bytes of free space and other a negative ++ * error codes in case of failure. ++ */ ++int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, ++ int squeeze) ++{ ++ const struct ubifs_lprops *lprops; ++ int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags; ++ ++ dbg_find("min_space %d", min_space); ++ ubifs_get_lprops(c); ++ ++ /* Check if there are enough empty LEBs for commit */ ++ spin_lock(&c->space_lock); ++ if (c->min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ else ++ rsvd_idx_lebs = 0; ++ lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - ++ c->lst.taken_empty_lebs; ++ if (rsvd_idx_lebs < lebs) ++ /* ++ * OK to allocate an empty LEB, but we still don't want to go ++ * looking for one if there aren't any. ++ */ ++ if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { ++ pick_free = 1; ++ /* ++ * Because we release the space lock, we must account ++ * for this allocation here. After the LEB properties ++ * flags have been updated, we subtract one. Note, the ++ * result of this is that lprops also decreases ++ * @taken_empty_lebs in 'ubifs_change_lp()', so it is ++ * off by one for a short period of time which may ++ * introduce a small disturbance to budgeting ++ * calculations, but this is harmless because at the ++ * worst case this would make the budgeting subsystem ++ * be more pessimistic than needed. ++ * ++ * Fundamentally, this is about serialization of the ++ * budgeting and lprops subsystems. We could make the ++ * @space_lock a mutex and avoid dropping it before ++ * calling 'ubifs_change_lp()', but mutex is more ++ * heavy-weight, and we want budgeting to be as fast as ++ * possible. ++ */ ++ c->lst.taken_empty_lebs += 1; ++ } ++ spin_unlock(&c->space_lock); ++ ++ lprops = do_find_free_space(c, min_space, pick_free, squeeze); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } ++ ++ lnum = lprops->lnum; ++ flags = lprops->flags | LPROPS_TAKEN; ++ ++ lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } ++ ++ if (pick_free) { ++ spin_lock(&c->space_lock); ++ c->lst.taken_empty_lebs -= 1; ++ spin_unlock(&c->space_lock); ++ } ++ ++ *offs = c->leb_size - lprops->free; ++ ubifs_release_lprops(c); ++ ++ if (*offs == 0) { ++ /* ++ * Ensure that empty LEBs have been unmapped. They may not have ++ * been, for example, because of an unclean unmount. Also ++ * LEBs that were freeable LEBs (free + dirty == leb_size) will ++ * not have been unmapped. ++ */ ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } ++ ++ dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs); ++ ubifs_assert(*offs <= c->leb_size - min_space); ++ return lnum; ++ ++out: ++ if (pick_free) { ++ spin_lock(&c->space_lock); ++ c->lst.taken_empty_lebs -= 1; ++ spin_unlock(&c->space_lock); ++ } ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * scan_for_idx_cb - callback used by the scan for a free LEB for the index. ++ * @c: the UBIFS file-system description object ++ * @lprops: LEB properties to scan ++ * @in_tree: whether the LEB properties are in main memory ++ * @data: information passed to and from the caller of the scan ++ * ++ * This function returns a code that indicates whether the scan should continue ++ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree ++ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop ++ * (%LPT_SCAN_STOP). ++ */ ++static int scan_for_idx_cb(struct ubifs_info *c, ++ const struct ubifs_lprops *lprops, int in_tree, ++ struct scan_data *data) ++{ ++ int ret = LPT_SCAN_CONTINUE; ++ ++ /* Exclude LEBs that are currently in use */ ++ if (lprops->flags & LPROPS_TAKEN) ++ return LPT_SCAN_CONTINUE; ++ /* Determine whether to add these LEB properties to the tree */ ++ if (!in_tree && valuable(c, lprops)) ++ ret |= LPT_SCAN_ADD; ++ /* Exclude index LEBS */ ++ if (lprops->flags & LPROPS_INDEX) ++ return ret; ++ /* Exclude LEBs that cannot be made empty */ ++ if (lprops->free + lprops->dirty != c->leb_size) ++ return ret; ++ /* ++ * We are allocating for the index so it is safe to allocate LEBs with ++ * only free and dirty space, because write buffers are sync'd at commit ++ * start. ++ */ ++ data->lnum = lprops->lnum; ++ return LPT_SCAN_ADD | LPT_SCAN_STOP; ++} ++ ++/** ++ * scan_for_leb_for_idx - scan for a free LEB for the index. ++ * @c: the UBIFS file-system description object ++ */ ++static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ struct scan_data data; ++ int err; ++ ++ data.lnum = -1; ++ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, ++ (ubifs_lpt_scan_callback)scan_for_idx_cb, ++ &data); ++ if (err) ++ return ERR_PTR(err); ++ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); ++ c->lscan_lnum = data.lnum; ++ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); ++ if (IS_ERR(lprops)) ++ return lprops; ++ ubifs_assert(lprops->lnum == data.lnum); ++ ubifs_assert(lprops->free + lprops->dirty == c->leb_size); ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); ++ return lprops; ++} ++ ++/** ++ * ubifs_find_free_leb_for_idx - find a free LEB for the index. ++ * @c: the UBIFS file-system description object ++ * ++ * This function looks for a free LEB and returns that LEB number. The returned ++ * LEB is marked as "taken", "index". ++ * ++ * Only empty LEBs are allocated. This is for two reasons. First, the commit ++ * calculates the number of LEBs to allocate based on the assumption that they ++ * will be empty. Secondly, free space at the end of an index LEB is not ++ * guaranteed to be empty because it may have been used by the in-the-gaps ++ * method prior to an unclean unmount. ++ * ++ * If no LEB is found %-ENOSPC is returned. For other failures another negative ++ * error code is returned. ++ */ ++int ubifs_find_free_leb_for_idx(struct ubifs_info *c) ++{ ++ const struct ubifs_lprops *lprops; ++ int lnum = -1, err, flags; ++ ++ ubifs_get_lprops(c); ++ ++ lprops = ubifs_fast_find_empty(c); ++ if (!lprops) { ++ lprops = ubifs_fast_find_freeable(c); ++ if (!lprops) { ++ ubifs_assert(c->freeable_cnt == 0); ++ if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { ++ lprops = scan_for_leb_for_idx(c); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } ++ } ++ } ++ } ++ ++ if (!lprops) { ++ err = -ENOSPC; ++ goto out; ++ } ++ ++ lnum = lprops->lnum; ++ ++ dbg_find("found LEB %d, free %d, dirty %d, flags %#x", ++ lnum, lprops->free, lprops->dirty, lprops->flags); ++ ++ flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX; ++ lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } ++ ++ ubifs_release_lprops(c); ++ ++ /* ++ * Ensure that empty LEBs have been unmapped. They may not have been, ++ * for example, because of an unclean unmount. Also LEBs that were ++ * freeable LEBs (free + dirty == leb_size) will not have been unmapped. ++ */ ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) { ++ ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_TAKEN | LPROPS_INDEX, 0); ++ return err; ++ } ++ ++ return lnum; ++ ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++static int cmp_dirty_idx(const struct ubifs_lprops **a, ++ const struct ubifs_lprops **b) ++{ ++ const struct ubifs_lprops *lpa = *a; ++ const struct ubifs_lprops *lpb = *b; ++ ++ return lpa->dirty + lpa->free - lpb->dirty - lpb->free; ++} ++ ++static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b, ++ int size) ++{ ++ struct ubifs_lprops *t = *a; ++ ++ *a = *b; ++ *b = t; ++} ++ ++/** ++ * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos. ++ * @c: the UBIFS file-system description object ++ * ++ * This function is called each commit to create an array of LEB numbers of ++ * dirty index LEBs sorted in order of dirty and free space. This is used by ++ * the in-the-gaps method of TNC commit. ++ */ ++int ubifs_save_dirty_idx_lnums(struct ubifs_info *c) ++{ ++ int i; ++ ++ ubifs_get_lprops(c); ++ /* Copy the LPROPS_DIRTY_IDX heap */ ++ c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt; ++ memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr, ++ sizeof(void *) * c->dirty_idx.cnt); ++ /* Sort it so that the dirtiest is now at the end */ ++ sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *), ++ (int (*)(const void *, const void *))cmp_dirty_idx, ++ (void (*)(void *, void *, int))swap_dirty_idx); ++ dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt); ++ if (c->dirty_idx.cnt) ++ dbg_find("dirtiest index LEB is %d with dirty %d and free %d", ++ c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum, ++ c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty, ++ c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free); ++ /* Replace the lprops pointers with LEB numbers */ ++ for (i = 0; i < c->dirty_idx.cnt; i++) ++ c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum; ++ ubifs_release_lprops(c); ++ return 0; ++} ++ ++/** ++ * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB. ++ * @c: the UBIFS file-system description object ++ * @lprops: LEB properties to scan ++ * @in_tree: whether the LEB properties are in main memory ++ * @data: information passed to and from the caller of the scan ++ * ++ * This function returns a code that indicates whether the scan should continue ++ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree ++ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop ++ * (%LPT_SCAN_STOP). ++ */ ++static int scan_dirty_idx_cb(struct ubifs_info *c, ++ const struct ubifs_lprops *lprops, int in_tree, ++ struct scan_data *data) ++{ ++ int ret = LPT_SCAN_CONTINUE; ++ ++ /* Exclude LEBs that are currently in use */ ++ if (lprops->flags & LPROPS_TAKEN) ++ return LPT_SCAN_CONTINUE; ++ /* Determine whether to add these LEB properties to the tree */ ++ if (!in_tree && valuable(c, lprops)) ++ ret |= LPT_SCAN_ADD; ++ /* Exclude non-index LEBs */ ++ if (!(lprops->flags & LPROPS_INDEX)) ++ return ret; ++ /* Exclude LEBs with too little space */ ++ if (lprops->free + lprops->dirty < c->min_idx_node_sz) ++ return ret; ++ /* Finally we found space */ ++ data->lnum = lprops->lnum; ++ return LPT_SCAN_ADD | LPT_SCAN_STOP; ++} ++ ++/** ++ * find_dirty_idx_leb - find a dirty index LEB. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns LEB number upon success and a negative error code upon ++ * failure. In particular, -ENOSPC is returned if a dirty index LEB is not ++ * found. ++ * ++ * Note that this function scans the entire LPT but it is called very rarely. ++ */ ++static int find_dirty_idx_leb(struct ubifs_info *c) ++{ ++ const struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ struct scan_data data; ++ int err, i, ret; ++ ++ /* Check all structures in memory first */ ++ data.lnum = -1; ++ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; ++ for (i = 0; i < heap->cnt; i++) { ++ lprops = heap->arr[i]; ++ ret = scan_dirty_idx_cb(c, lprops, 1, &data); ++ if (ret & LPT_SCAN_STOP) ++ goto found; ++ } ++ list_for_each_entry(lprops, &c->frdi_idx_list, list) { ++ ret = scan_dirty_idx_cb(c, lprops, 1, &data); ++ if (ret & LPT_SCAN_STOP) ++ goto found; ++ } ++ list_for_each_entry(lprops, &c->uncat_list, list) { ++ ret = scan_dirty_idx_cb(c, lprops, 1, &data); ++ if (ret & LPT_SCAN_STOP) ++ goto found; ++ } ++ if (c->pnodes_have >= c->pnode_cnt) ++ /* All pnodes are in memory, so skip scan */ ++ return -ENOSPC; ++ err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum, ++ (ubifs_lpt_scan_callback)scan_dirty_idx_cb, ++ &data); ++ if (err) ++ return err; ++found: ++ ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt); ++ c->lscan_lnum = data.lnum; ++ lprops = ubifs_lpt_lookup_dirty(c, data.lnum); ++ if (IS_ERR(lprops)) ++ return PTR_ERR(lprops); ++ ubifs_assert(lprops->lnum == data.lnum); ++ ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz); ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert((lprops->flags & LPROPS_INDEX)); ++ ++ dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x", ++ lprops->lnum, lprops->free, lprops->dirty, lprops->flags); ++ ++ lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, ++ lprops->flags | LPROPS_TAKEN, 0); ++ if (IS_ERR(lprops)) ++ return PTR_ERR(lprops); ++ ++ return lprops->lnum; ++} ++ ++/** ++ * get_idx_gc_leb - try to get a LEB number from trivial GC. ++ * @c: the UBIFS file-system description object ++ */ ++static int get_idx_gc_leb(struct ubifs_info *c) ++{ ++ const struct ubifs_lprops *lp; ++ int err, lnum; ++ ++ err = ubifs_get_idx_gc_leb(c); ++ if (err < 0) ++ return err; ++ lnum = err; ++ /* ++ * The LEB was due to be unmapped after the commit but ++ * it is needed now for this commit. ++ */ ++ lp = ubifs_lpt_lookup_dirty(c, lnum); ++ if (IS_ERR(lp)) ++ return PTR_ERR(lp); ++ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, ++ lp->flags | LPROPS_INDEX, -1); ++ if (IS_ERR(lp)) ++ return PTR_ERR(lp); ++ dbg_find("LEB %d, dirty %d and free %d flags %#x", ++ lp->lnum, lp->dirty, lp->free, lp->flags); ++ return lnum; ++} ++ ++/** ++ * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array. ++ * @c: the UBIFS file-system description object ++ */ ++static int find_dirtiest_idx_leb(struct ubifs_info *c) ++{ ++ const struct ubifs_lprops *lp; ++ int lnum; ++ ++ while (1) { ++ if (!c->dirty_idx.cnt) ++ return -ENOSPC; ++ /* The lprops pointers were replaced by LEB numbers */ ++ lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt]; ++ lp = ubifs_lpt_lookup(c, lnum); ++ if (IS_ERR(lp)) ++ return PTR_ERR(lp); ++ if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX)) ++ continue; ++ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, ++ lp->flags | LPROPS_TAKEN, 0); ++ if (IS_ERR(lp)) ++ return PTR_ERR(lp); ++ break; ++ } ++ dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty, ++ lp->free, lp->flags); ++ ubifs_assert(lp->flags | LPROPS_TAKEN); ++ ubifs_assert(lp->flags | LPROPS_INDEX); ++ return lnum; ++} ++ ++/** ++ * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit. ++ * @c: the UBIFS file-system description object ++ * ++ * This function attempts to find an untaken index LEB with the most free and ++ * dirty space that can be used without overwriting index nodes that were in the ++ * last index committed. ++ */ ++int ubifs_find_dirty_idx_leb(struct ubifs_info *c) ++{ ++ int err; ++ ++ ubifs_get_lprops(c); ++ ++ /* ++ * We made an array of the dirtiest index LEB numbers as at the start of ++ * last commit. Try that array first. ++ */ ++ err = find_dirtiest_idx_leb(c); ++ ++ /* Next try scanning the entire LPT */ ++ if (err == -ENOSPC) ++ err = find_dirty_idx_leb(c); ++ ++ /* Finally take any index LEBs awaiting trivial GC */ ++ if (err == -ENOSPC) ++ err = get_idx_gc_leb(c); ++ ++ ubifs_release_lprops(c); ++ return err; ++} +diff -Nurd linux-2.6.24/fs/ubifs/gc.c ubifs-v2.6.24/fs/ubifs/gc.c +--- linux-2.6.24/fs/ubifs/gc.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/gc.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1033 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements garbage collection. The procedure for garbage collection ++ * is different depending on whether a LEB as an index LEB (contains index ++ * nodes) or not. For non-index LEBs, garbage collection finds a LEB which ++ * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete ++ * nodes to the journal, at which point the garbage-collected LEB is free to be ++ * reused. For index LEBs, garbage collection marks the non-obsolete index nodes ++ * dirty in the TNC, and after the next commit, the garbage-collected LEB is ++ * to be reused. Garbage collection will cause the number of dirty index nodes ++ * to grow, however sufficient space is reserved for the index to ensure the ++ * commit will never run out of space. ++ * ++ * Notes about dead watermark. At current UBIFS implementation we assume that ++ * LEBs which have less than @c->dead_wm bytes of free + dirty space are full ++ * and not worth garbage-collecting. The dead watermark is one min. I/O unit ++ * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS ++ * Garbage Collector has to synchronize the GC head's write buffer before ++ * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can ++ * actually reclaim even very small pieces of dirty space by garbage collecting ++ * enough dirty LEBs, but we do not bother doing this at this implementation. ++ * ++ * Notes about dark watermark. The results of GC work depends on how big are ++ * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed, ++ * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would ++ * have to waste large pieces of free space at the end of LEB B, because nodes ++ * from LEB A would not fit. And the worst situation is when all nodes are of ++ * maximum size. So dark watermark is the amount of free + dirty space in LEB ++ * which are guaranteed to be reclaimable. If LEB has less space, the GC might ++ * be unable to reclaim it. So, LEBs with free + dirty greater than dark ++ * watermark are "good" LEBs from GC's point of few. The other LEBs are not so ++ * good, and GC takes extra care when moving them. ++ */ ++ ++#include <linux/pagemap.h> ++#include "ubifs.h" ++ ++/* ++ * GC may need to move more then one LEB to make progress. The below constants ++ * define "soft" and "hard" limits on the number of LEBs the garbage collector ++ * may move. ++ */ ++#define SOFT_LEBS_LIMIT 4 ++#define HARD_LEBS_LIMIT 32 ++ ++/** ++ * switch_gc_head - switch the garbage collection journal head. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to write ++ * @len: length of the buffer to write ++ * @lnum: LEB number written is returned here ++ * @offs: offset written is returned here ++ * ++ * This function switch the GC head to the next LEB which is reserved in ++ * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required, ++ * and other negative error code in case of failures. ++ */ ++static int switch_gc_head(struct ubifs_info *c) ++{ ++ int err, gc_lnum = c->gc_lnum; ++ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; ++ ++ ubifs_assert(gc_lnum != -1); ++ dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)", ++ wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum, ++ c->leb_size - wbuf->offs - wbuf->used); ++ ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ return err; ++ ++ /* ++ * The GC write-buffer was synchronized, we may safely unmap ++ * 'c->gc_lnum'. ++ */ ++ err = ubifs_leb_unmap(c, gc_lnum); ++ if (err) ++ return err; ++ ++ err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); ++ if (err) ++ return err; ++ ++ c->gc_lnum = -1; ++ err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM); ++ return err; ++} ++ ++/** ++ * list_sort - sort a list. ++ * @priv: private data, passed to @cmp ++ * @head: the list to sort ++ * @cmp: the elements comparison function ++ * ++ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It ++ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted ++ * in ascending order. ++ * ++ * The comparison function @cmp is supposed to return a negative value if @a is ++ * than @b, and a positive value if @a is greater than @b. If @a and @b are ++ * equivalent, then it does not matter what this function returns. ++ */ ++static void list_sort(void *priv, struct list_head *head, ++ int (*cmp)(void *priv, struct list_head *a, ++ struct list_head *b)) ++{ ++ struct list_head *p, *q, *e, *list, *tail, *oldhead; ++ int insize, nmerges, psize, qsize, i; ++ ++ if (list_empty(head)) ++ return; ++ ++ list = head->next; ++ list_del(head); ++ insize = 1; ++ for (;;) { ++ p = oldhead = list; ++ list = tail = NULL; ++ nmerges = 0; ++ ++ while (p) { ++ nmerges++; ++ q = p; ++ psize = 0; ++ for (i = 0; i < insize; i++) { ++ psize++; ++ q = q->next == oldhead ? NULL : q->next; ++ if (!q) ++ break; ++ } ++ ++ qsize = insize; ++ while (psize > 0 || (qsize > 0 && q)) { ++ if (!psize) { ++ e = q; ++ q = q->next; ++ qsize--; ++ if (q == oldhead) ++ q = NULL; ++ } else if (!qsize || !q) { ++ e = p; ++ p = p->next; ++ psize--; ++ if (p == oldhead) ++ p = NULL; ++ } else if (cmp(priv, p, q) <= 0) { ++ e = p; ++ p = p->next; ++ psize--; ++ if (p == oldhead) ++ p = NULL; ++ } else { ++ e = q; ++ q = q->next; ++ qsize--; ++ if (q == oldhead) ++ q = NULL; ++ } ++ if (tail) ++ tail->next = e; ++ else ++ list = e; ++ e->prev = tail; ++ tail = e; ++ } ++ p = q; ++ } ++ ++ tail->next = list; ++ list->prev = tail; ++ ++ if (nmerges <= 1) ++ break; ++ ++ insize *= 2; ++ } ++ ++ head->next = list; ++ head->prev = list->prev; ++ list->prev->next = head; ++ list->prev = head; ++} ++ ++/** ++ * data_nodes_cmp - compare 2 data nodes. ++ * @priv: UBIFS file-system description object ++ * @a: first data node ++ * @a: second data node ++ * ++ * This function compares data nodes @a and @b. Returns %1 if @a has greater ++ * inode or block number, and %-1 otherwise. ++ */ ++int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) ++{ ++ ino_t inuma, inumb; ++ struct ubifs_info *c = priv; ++ struct ubifs_scan_node *sa, *sb; ++ ++ cond_resched(); ++ sa = list_entry(a, struct ubifs_scan_node, list); ++ sb = list_entry(b, struct ubifs_scan_node, list); ++ ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); ++ ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); ++ ++ inuma = key_inum(c, &sa->key); ++ inumb = key_inum(c, &sb->key); ++ ++ if (inuma == inumb) { ++ unsigned int blka = key_block(c, &sa->key); ++ unsigned int blkb = key_block(c, &sb->key); ++ ++ if (blka <= blkb) ++ return -1; ++ } else if (inuma <= inumb) ++ return -1; ++ ++ return 1; ++} ++ ++/* ++ * nondata_nodes_cmp - compare 2 non-data nodes. ++ * @priv: UBIFS file-system description object ++ * @a: first node ++ * @a: second node ++ * ++ * This function compares nodes @a and @b. It makes sure that inode nodes go ++ * first and sorted by length in descending order. Directory entry nodes go ++ * after inode nodes and are sorted in ascending hash valuer order. ++ */ ++int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) ++{ ++ int typea, typeb; ++ ino_t inuma, inumb; ++ struct ubifs_info *c = priv; ++ struct ubifs_scan_node *sa, *sb; ++ ++ cond_resched(); ++ sa = list_entry(a, struct ubifs_scan_node, list); ++ sb = list_entry(b, struct ubifs_scan_node, list); ++ typea = key_type(c, &sa->key); ++ typeb = key_type(c, &sb->key); ++ ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); ++ ++ /* Inodes go before directory entries */ ++ if (typea == UBIFS_INO_KEY) { ++ if (typeb == UBIFS_INO_KEY) ++ return sb->len - sa->len; ++ return -1; ++ } ++ if (typeb == UBIFS_INO_KEY) ++ return 1; ++ ++ ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); ++ inuma = key_inum(c, &sa->key); ++ inumb = key_inum(c, &sb->key); ++ ++ if (inuma == inumb) { ++ uint32_t hasha = key_hash(c, &sa->key); ++ uint32_t hashb = key_hash(c, &sb->key); ++ ++ if (hasha <= hashb) ++ return -1; ++ } else if (inuma <= inumb) ++ return -1; ++ ++ return 1; ++} ++ ++/** ++ * sort_nodes - sort nodes for GC. ++ * @c: UBIFS file-system description object ++ * @sleb: describes nodes to sort and contains the result on exit ++ * @nondata: contains non-data nodes on exit ++ * @min: minimum node size is returned here ++ * ++ * This function sorts the list of inodes to garbage collect. First of all, it ++ * kills obsolete nodes and separates data and non-data nodes to the ++ * @sleb->nodes and @nondata lists correspondingly. ++ * ++ * Data nodes are then sorted in block number order - this is important for ++ * bulk-read; data nodes with lower inode number go before data nodes with ++ * higher inode number, and data nodes with lower block number go before data ++ * nodes with higher block number; ++ * ++ * Non-data nodes are sorted as follows. ++ * o First go inode nodes - they are sorted in descending length order. ++ * o Then go directory entry nodes - they are sorted in hash order, which ++ * should supposedly optimize 'readdir()'. Direntry nodes with lower parent ++ * inode number go before direntry nodes with higher parent inode number, ++ * and direntry nodes with lower name hash values go before direntry nodes ++ * with higher name hash values. ++ * ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ struct list_head *nondata, int *min) ++{ ++ struct ubifs_scan_node *snod, *tmp; ++ ++ *min = INT_MAX; ++ ++ /* Separate data nodes and non-data nodes */ ++ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { ++ int err; ++ ++ ubifs_assert(snod->type != UBIFS_IDX_NODE); ++ ubifs_assert(snod->type != UBIFS_REF_NODE); ++ ubifs_assert(snod->type != UBIFS_CS_NODE); ++ ++ err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, ++ snod->offs, 0); ++ if (err < 0) ++ return err; ++ ++ if (!err) { ++ /* The node is obsolete, remove it from the list */ ++ list_del(&snod->list); ++ kfree(snod); ++ continue; ++ } ++ ++ if (snod->len < *min) ++ *min = snod->len; ++ ++ if (key_type(c, &snod->key) != UBIFS_DATA_KEY) ++ list_move_tail(&snod->list, nondata); ++ } ++ ++ /* Sort data and non-data nodes */ ++ list_sort(c, &sleb->nodes, &data_nodes_cmp); ++ list_sort(c, nondata, &nondata_nodes_cmp); ++ return 0; ++} ++ ++/** ++ * move_node - move a node. ++ * @c: UBIFS file-system description object ++ * @sleb: describes the LEB to move nodes from ++ * @snod: the mode to move ++ * @wbuf: write-buffer to move node to ++ * ++ * This function moves node @snod to @wbuf, changes TNC correspondingly, and ++ * destroys @snod. Returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf) ++{ ++ int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used; ++ ++ cond_resched(); ++ err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len); ++ if (err) ++ return err; ++ ++ err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, ++ snod->offs, new_lnum, new_offs, ++ snod->len); ++ list_del(&snod->list); ++ kfree(snod); ++ return err; ++} ++ ++/** ++ * move_nodes - move nodes. ++ * @c: UBIFS file-system description object ++ * @sleb: describes the LEB to move nodes from ++ * ++ * This function moves valid nodes from data LEB described by @sleb to the GC ++ * journal head. This function returns zero in case of success, %-EAGAIN if ++ * commit is required, and other negative error codes in case of other ++ * failures. ++ */ ++static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) ++{ ++ int err, min; ++ LIST_HEAD(nondata); ++ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; ++ ++ if (wbuf->lnum == -1) { ++ /* ++ * The GC journal head is not set, because it is the first GC ++ * invocation since mount. ++ */ ++ err = switch_gc_head(c); ++ if (err) ++ return err; ++ } ++ ++ err = sort_nodes(c, sleb, &nondata, &min); ++ if (err) ++ goto out; ++ ++ /* Write nodes to their new location. Use the first-fit strategy */ ++ while (1) { ++ int avail; ++ struct ubifs_scan_node *snod, *tmp; ++ ++ /* Move data nodes */ ++ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { ++ avail = c->leb_size - wbuf->offs - wbuf->used; ++ if (snod->len > avail) ++ /* ++ * Do not skip data nodes in order to optimize ++ * bulk-read. ++ */ ++ break; ++ ++ err = move_node(c, sleb, snod, wbuf); ++ if (err) ++ goto out; ++ } ++ ++ /* Move non-data nodes */ ++ list_for_each_entry_safe(snod, tmp, &nondata, list) { ++ avail = c->leb_size - wbuf->offs - wbuf->used; ++ if (avail < min) ++ break; ++ ++ if (snod->len > avail) { ++ /* ++ * Keep going only if this is an inode with ++ * some data. Otherwise stop and switch the GC ++ * head. IOW, we assume that data-less inode ++ * nodes and direntry nodes are roughly of the ++ * same size. ++ */ ++ if (key_type(c, &snod->key) == UBIFS_DENT_KEY || ++ snod->len == UBIFS_INO_NODE_SZ) ++ break; ++ continue; ++ } ++ ++ err = move_node(c, sleb, snod, wbuf); ++ if (err) ++ goto out; ++ } ++ ++ if (list_empty(&sleb->nodes) && list_empty(&nondata)) ++ break; ++ ++ /* ++ * Waste the rest of the space in the LEB and switch to the ++ * next LEB. ++ */ ++ err = switch_gc_head(c); ++ if (err) ++ goto out; ++ } ++ ++ return 0; ++ ++out: ++ list_splice_tail(&nondata, &sleb->nodes); ++ return err; ++} ++ ++/** ++ * gc_sync_wbufs - sync write-buffers for GC. ++ * @c: UBIFS file-system description object ++ * ++ * We must guarantee that obsoleting nodes are on flash. Unfortunately they may ++ * be in a write-buffer instead. That is, a node could be written to a ++ * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is ++ * erased before the write-buffer is sync'd and then there is an unclean ++ * unmount, then an existing node is lost. To avoid this, we sync all ++ * write-buffers. ++ * ++ * This function returns %0 on success or a negative error code on failure. ++ */ ++static int gc_sync_wbufs(struct ubifs_info *c) ++{ ++ int err, i; ++ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ if (i == GCHD) ++ continue; ++ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/** ++ * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock. ++ * @c: UBIFS file-system description object ++ * @lp: describes the LEB to garbage collect ++ * ++ * This function garbage-collects an LEB and returns one of the @LEB_FREED, ++ * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is ++ * required, and other negative error codes in case of failures. ++ */ ++int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) ++{ ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; ++ int err = 0, lnum = lp->lnum; ++ ++ ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 || ++ c->need_recovery); ++ ubifs_assert(c->gc_lnum != lnum); ++ ubifs_assert(wbuf->lnum != lnum); ++ ++ /* ++ * We scan the entire LEB even though we only really need to scan up to ++ * (c->leb_size - lp->free). ++ */ ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ if (IS_ERR(sleb)) ++ return PTR_ERR(sleb); ++ ++ ubifs_assert(!list_empty(&sleb->nodes)); ++ snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); ++ ++ if (snod->type == UBIFS_IDX_NODE) { ++ struct ubifs_gced_idx_leb *idx_gc; ++ ++ dbg_gc("indexing LEB %d (free %d, dirty %d)", ++ lnum, lp->free, lp->dirty); ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ struct ubifs_idx_node *idx = snod->node; ++ int level = le16_to_cpu(idx->level); ++ ++ ubifs_assert(snod->type == UBIFS_IDX_NODE); ++ key_read(c, ubifs_idx_key(c, idx), &snod->key); ++ err = ubifs_dirty_idx_node(c, &snod->key, level, lnum, ++ snod->offs); ++ if (err) ++ goto out; ++ } ++ ++ idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); ++ if (!idx_gc) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ idx_gc->lnum = lnum; ++ idx_gc->unmap = 0; ++ list_add(&idx_gc->list, &c->idx_gc); ++ ++ /* ++ * Don't release the LEB until after the next commit, because ++ * it may contain data which is needed for recovery. So ++ * although we freed this LEB, it will become usable only after ++ * the commit. ++ */ ++ err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, ++ LPROPS_INDEX, 1); ++ if (err) ++ goto out; ++ err = LEB_FREED_IDX; ++ } else { ++ dbg_gc("data LEB %d (free %d, dirty %d)", ++ lnum, lp->free, lp->dirty); ++ ++ err = move_nodes(c, sleb); ++ if (err) ++ goto out_inc_seq; ++ ++ err = gc_sync_wbufs(c); ++ if (err) ++ goto out_inc_seq; ++ ++ err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); ++ if (err) ++ goto out_inc_seq; ++ ++ /* Allow for races with TNC */ ++ c->gced_lnum = lnum; ++ smp_wmb(); ++ c->gc_seq += 1; ++ smp_wmb(); ++ ++ if (c->gc_lnum == -1) { ++ c->gc_lnum = lnum; ++ err = LEB_RETAINED; ++ } else { ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ goto out; ++ ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ goto out; ++ ++ err = LEB_FREED; ++ } ++ } ++ ++out: ++ ubifs_scan_destroy(sleb); ++ return err; ++ ++out_inc_seq: ++ /* We may have moved at least some nodes so allow for races with TNC */ ++ c->gced_lnum = lnum; ++ smp_wmb(); ++ c->gc_seq += 1; ++ smp_wmb(); ++ goto out; ++} ++ ++/** ++ * ubifs_garbage_collect - UBIFS garbage collector. ++ * @c: UBIFS file-system description object ++ * @anyway: do GC even if there are free LEBs ++ * ++ * This function does out-of-place garbage collection. The return codes are: ++ * o positive LEB number if the LEB has been freed and may be used; ++ * o %-EAGAIN if the caller has to run commit; ++ * o %-ENOSPC if GC failed to make any progress; ++ * o other negative error codes in case of other errors. ++ * ++ * Garbage collector writes data to the journal when GC'ing data LEBs, and just ++ * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point ++ * commit may be required. But commit cannot be run from inside GC, because the ++ * caller might be holding the commit lock, so %-EAGAIN is returned instead; ++ * And this error code means that the caller has to run commit, and re-run GC ++ * if there is still no free space. ++ * ++ * There are many reasons why this function may return %-EAGAIN: ++ * o the log is full and there is no space to write an LEB reference for ++ * @c->gc_lnum; ++ * o the journal is too large and exceeds size limitations; ++ * o GC moved indexing LEBs, but they can be used only after the commit; ++ * o the shrinker fails to find clean znodes to free and requests the commit; ++ * o etc. ++ * ++ * Note, if the file-system is close to be full, this function may return ++ * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of ++ * the function. E.g., this happens if the limits on the journal size are too ++ * tough and GC writes too much to the journal before an LEB is freed. This ++ * might also mean that the journal is too large, and the TNC becomes to big, ++ * so that the shrinker is constantly called, finds not clean znodes to free, ++ * and requests commit. Well, this may also happen if the journal is all right, ++ * but another kernel process consumes too much memory. Anyway, infinite ++ * %-EAGAIN may happen, but in some extreme/misconfiguration cases. ++ */ ++int ubifs_garbage_collect(struct ubifs_info *c, int anyway) ++{ ++ int i, err, ret, min_space = c->dead_wm; ++ struct ubifs_lprops lp; ++ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; ++ ++ ubifs_assert_cmt_locked(c); ++ ++ if (ubifs_gc_should_commit(c)) ++ return -EAGAIN; ++ ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ ++ if (c->ro_media) { ++ ret = -EROFS; ++ goto out_unlock; ++ } ++ ++ /* We expect the write-buffer to be empty on entry */ ++ ubifs_assert(!wbuf->used); ++ ++ for (i = 0; ; i++) { ++ int space_before = c->leb_size - wbuf->offs - wbuf->used; ++ int space_after; ++ ++ cond_resched(); ++ ++ /* Give the commit an opportunity to run */ ++ if (ubifs_gc_should_commit(c)) { ++ ret = -EAGAIN; ++ break; ++ } ++ ++ if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) { ++ /* ++ * We've done enough iterations. Indexing LEBs were ++ * moved and will be available after the commit. ++ */ ++ dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN"); ++ ubifs_commit_required(c); ++ ret = -EAGAIN; ++ break; ++ } ++ ++ if (i > HARD_LEBS_LIMIT) { ++ /* ++ * We've moved too many LEBs and have not made ++ * progress, give up. ++ */ ++ dbg_gc("hard limit, -ENOSPC"); ++ ret = -ENOSPC; ++ break; ++ } ++ ++ /* ++ * Empty and freeable LEBs can turn up while we waited for ++ * the wbuf lock, or while we have been running GC. In that ++ * case, we should just return one of those instead of ++ * continuing to GC dirty LEBs. Hence we request ++ * 'ubifs_find_dirty_leb()' to return an empty LEB if it can. ++ */ ++ ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1); ++ if (ret) { ++ if (ret == -ENOSPC) ++ dbg_gc("no more dirty LEBs"); ++ break; ++ } ++ ++ dbg_gc("found LEB %d: free %d, dirty %d, sum %d " ++ "(min. space %d)", lp.lnum, lp.free, lp.dirty, ++ lp.free + lp.dirty, min_space); ++ ++ if (lp.free + lp.dirty == c->leb_size) { ++ /* An empty LEB was returned */ ++ dbg_gc("LEB %d is free, return it", lp.lnum); ++ /* ++ * ubifs_find_dirty_leb() doesn't return freeable index ++ * LEBs. ++ */ ++ ubifs_assert(!(lp.flags & LPROPS_INDEX)); ++ if (lp.free != c->leb_size) { ++ /* ++ * Write buffers must be sync'd before ++ * unmapping freeable LEBs, because one of them ++ * may contain data which obsoletes something ++ * in 'lp.pnum'. ++ */ ++ ret = gc_sync_wbufs(c); ++ if (ret) ++ goto out; ++ ret = ubifs_change_one_lp(c, lp.lnum, ++ c->leb_size, 0, 0, 0, ++ 0); ++ if (ret) ++ goto out; ++ } ++ ret = ubifs_leb_unmap(c, lp.lnum); ++ if (ret) ++ goto out; ++ ret = lp.lnum; ++ break; ++ } ++ ++ space_before = c->leb_size - wbuf->offs - wbuf->used; ++ if (wbuf->lnum == -1) ++ space_before = 0; ++ ++ ret = ubifs_garbage_collect_leb(c, &lp); ++ if (ret < 0) { ++ if (ret == -EAGAIN || ret == -ENOSPC) { ++ /* ++ * These codes are not errors, so we have to ++ * return the LEB to lprops. But if the ++ * 'ubifs_return_leb()' function fails, its ++ * failure code is propagated to the caller ++ * instead of the original '-EAGAIN' or ++ * '-ENOSPC'. ++ */ ++ err = ubifs_return_leb(c, lp.lnum); ++ if (err) ++ ret = err; ++ break; ++ } ++ goto out; ++ } ++ ++ if (ret == LEB_FREED) { ++ /* An LEB has been freed and is ready for use */ ++ dbg_gc("LEB %d freed, return", lp.lnum); ++ ret = lp.lnum; ++ break; ++ } ++ ++ if (ret == LEB_FREED_IDX) { ++ /* ++ * This was an indexing LEB and it cannot be ++ * immediately used. And instead of requesting the ++ * commit straight away, we try to garbage collect some ++ * more. ++ */ ++ dbg_gc("indexing LEB %d freed, continue", lp.lnum); ++ continue; ++ } ++ ++ ubifs_assert(ret == LEB_RETAINED); ++ space_after = c->leb_size - wbuf->offs - wbuf->used; ++ dbg_gc("LEB %d retained, freed %d bytes", lp.lnum, ++ space_after - space_before); ++ ++ if (space_after > space_before) { ++ /* GC makes progress, keep working */ ++ min_space >>= 1; ++ if (min_space < c->dead_wm) ++ min_space = c->dead_wm; ++ continue; ++ } ++ ++ dbg_gc("did not make progress"); ++ ++ /* ++ * GC moved an LEB bud have not done any progress. This means ++ * that the previous GC head LEB contained too few free space ++ * and the LEB which was GC'ed contained only large nodes which ++ * did not fit that space. ++ * ++ * We can do 2 things: ++ * 1. pick another LEB in a hope it'll contain a small node ++ * which will fit the space we have at the end of current GC ++ * head LEB, but there is no guarantee, so we try this out ++ * unless we have already been working for too long; ++ * 2. request an LEB with more dirty space, which will force ++ * 'ubifs_find_dirty_leb()' to start scanning the lprops ++ * table, instead of just picking one from the heap ++ * (previously it already picked the dirtiest LEB). ++ */ ++ if (i < SOFT_LEBS_LIMIT) { ++ dbg_gc("try again"); ++ continue; ++ } ++ ++ min_space <<= 1; ++ if (min_space > c->dark_wm) ++ min_space = c->dark_wm; ++ dbg_gc("set min. space to %d", min_space); ++ } ++ ++ if (ret == -ENOSPC && !list_empty(&c->idx_gc)) { ++ dbg_gc("no space, some index LEBs GC'ed, -EAGAIN"); ++ ubifs_commit_required(c); ++ ret = -EAGAIN; ++ } ++ ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (!err) ++ err = ubifs_leb_unmap(c, c->gc_lnum); ++ if (err) { ++ ret = err; ++ goto out; ++ } ++out_unlock: ++ mutex_unlock(&wbuf->io_mutex); ++ return ret; ++ ++out: ++ ubifs_assert(ret < 0); ++ ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); ++ ubifs_ro_mode(c, ret); ++ ubifs_wbuf_sync_nolock(wbuf); ++ mutex_unlock(&wbuf->io_mutex); ++ ubifs_return_leb(c, lp.lnum); ++ return ret; ++} ++ ++/** ++ * ubifs_gc_start_commit - garbage collection at start of commit. ++ * @c: UBIFS file-system description object ++ * ++ * If a LEB has only dirty and free space, then we may safely unmap it and make ++ * it free. Note, we cannot do this with indexing LEBs because dirty space may ++ * correspond index nodes that are required for recovery. In that case, the ++ * LEB cannot be unmapped until after the next commit. ++ * ++ * This function returns %0 upon success and a negative error code upon failure. ++ */ ++int ubifs_gc_start_commit(struct ubifs_info *c) ++{ ++ struct ubifs_gced_idx_leb *idx_gc; ++ const struct ubifs_lprops *lp; ++ int err = 0, flags; ++ ++ ubifs_get_lprops(c); ++ ++ /* ++ * Unmap (non-index) freeable LEBs. Note that recovery requires that all ++ * wbufs are sync'd before this, which is done in 'do_commit()'. ++ */ ++ while (1) { ++ lp = ubifs_fast_find_freeable(c); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ if (!lp) ++ break; ++ ubifs_assert(!(lp->flags & LPROPS_TAKEN)); ++ ubifs_assert(!(lp->flags & LPROPS_INDEX)); ++ err = ubifs_leb_unmap(c, lp->lnum); ++ if (err) ++ goto out; ++ lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ubifs_assert(!(lp->flags & LPROPS_TAKEN)); ++ ubifs_assert(!(lp->flags & LPROPS_INDEX)); ++ } ++ ++ /* Mark GC'd index LEBs OK to unmap after this commit finishes */ ++ list_for_each_entry(idx_gc, &c->idx_gc, list) ++ idx_gc->unmap = 1; ++ ++ /* Record index freeable LEBs for unmapping after commit */ ++ while (1) { ++ lp = ubifs_fast_find_frdi_idx(c); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ if (!lp) ++ break; ++ idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS); ++ if (!idx_gc) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ubifs_assert(!(lp->flags & LPROPS_TAKEN)); ++ ubifs_assert(lp->flags & LPROPS_INDEX); ++ /* Don't release the LEB until after the next commit */ ++ flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; ++ lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ kfree(idx_gc); ++ goto out; ++ } ++ ubifs_assert(lp->flags & LPROPS_TAKEN); ++ ubifs_assert(!(lp->flags & LPROPS_INDEX)); ++ idx_gc->lnum = lp->lnum; ++ idx_gc->unmap = 1; ++ list_add(&idx_gc->list, &c->idx_gc); ++ } ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * ubifs_gc_end_commit - garbage collection at end of commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function completes out-of-place garbage collection of index LEBs. ++ */ ++int ubifs_gc_end_commit(struct ubifs_info *c) ++{ ++ struct ubifs_gced_idx_leb *idx_gc, *tmp; ++ struct ubifs_wbuf *wbuf; ++ int err = 0; ++ ++ wbuf = &c->jheads[GCHD].wbuf; ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list) ++ if (idx_gc->unmap) { ++ dbg_gc("LEB %d", idx_gc->lnum); ++ err = ubifs_leb_unmap(c, idx_gc->lnum); ++ if (err) ++ goto out; ++ err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, ++ LPROPS_NC, 0, LPROPS_TAKEN, -1); ++ if (err) ++ goto out; ++ list_del(&idx_gc->list); ++ kfree(idx_gc); ++ } ++out: ++ mutex_unlock(&wbuf->io_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_destroy_idx_gc - destroy idx_gc list. ++ * @c: UBIFS file-system description object ++ * ++ * This function destroys the @c->idx_gc list. It is called when unmounting ++ * so locks are not needed. Returns zero in case of success and a negative ++ * error code in case of failure. ++ */ ++void ubifs_destroy_idx_gc(struct ubifs_info *c) ++{ ++ while (!list_empty(&c->idx_gc)) { ++ struct ubifs_gced_idx_leb *idx_gc; ++ ++ idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, ++ list); ++ c->idx_gc_cnt -= 1; ++ list_del(&idx_gc->list); ++ kfree(idx_gc); ++ } ++} ++ ++/** ++ * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list. ++ * @c: UBIFS file-system description object ++ * ++ * Called during start commit so locks are not needed. ++ */ ++int ubifs_get_idx_gc_leb(struct ubifs_info *c) ++{ ++ struct ubifs_gced_idx_leb *idx_gc; ++ int lnum; ++ ++ if (list_empty(&c->idx_gc)) ++ return -ENOSPC; ++ idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); ++ lnum = idx_gc->lnum; ++ /* c->idx_gc_cnt is updated by the caller when lprops are updated */ ++ list_del(&idx_gc->list); ++ kfree(idx_gc); ++ return lnum; ++} +diff -Nurd linux-2.6.24/fs/ubifs/io.c ubifs-v2.6.24/fs/ubifs/io.c +--- linux-2.6.24/fs/ubifs/io.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/io.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,940 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * Copyright (C) 2006, 2007 University of Szeged, Hungary ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ * Zoltan Sogor ++ */ ++ ++/* ++ * This file implements UBIFS I/O subsystem which provides various I/O-related ++ * helper functions (reading/writing/checking/validating nodes) and implements ++ * write-buffering support. Write buffers help to save space which otherwise ++ * would have been wasted for padding to the nearest minimal I/O unit boundary. ++ * Instead, data first goes to the write-buffer and is flushed when the ++ * buffer is full or when it is not used for some time (by timer). This is ++ * similar to the mechanism is used by JFFS2. ++ * ++ * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by ++ * mutexes defined inside these objects. Since sometimes upper-level code ++ * has to lock the write-buffer (e.g. journal space reservation code), many ++ * functions related to write-buffers have "nolock" suffix which means that the ++ * caller has to lock the write-buffer before calling this function. ++ * ++ * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not ++ * aligned, UBIFS starts the next node from the aligned address, and the padded ++ * bytes may contain any rubbish. In other words, UBIFS does not put padding ++ * bytes in those small gaps. Common headers of nodes store real node lengths, ++ * not aligned lengths. Indexing nodes also store real lengths in branches. ++ * ++ * UBIFS uses padding when it pads to the next min. I/O unit. In this case it ++ * uses padding nodes or padding bytes, if the padding node does not fit. ++ * ++ * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes ++ * every time they are read from the flash media. ++ */ ++ ++#include <linux/crc32.h> ++#include "ubifs.h" ++ ++/** ++ * ubifs_ro_mode - switch UBIFS to read read-only mode. ++ * @c: UBIFS file-system description object ++ * @err: error code which is the reason of switching to R/O mode ++ */ ++void ubifs_ro_mode(struct ubifs_info *c, int err) ++{ ++ if (!c->ro_media) { ++ c->ro_media = 1; ++ c->no_chk_data_crc = 0; ++ ubifs_warn("switched to read-only mode, error %d", err); ++ dbg_dump_stack(); ++ } ++} ++ ++/** ++ * ubifs_check_node - check node. ++ * @c: UBIFS file-system description object ++ * @buf: node to check ++ * @lnum: logical eraseblock number ++ * @offs: offset within the logical eraseblock ++ * @quiet: print no messages ++ * @must_chk_crc: indicates whether to always check the CRC ++ * ++ * This function checks node magic number and CRC checksum. This function also ++ * validates node length to prevent UBIFS from becoming crazy when an attacker ++ * feeds it a file-system image with incorrect nodes. For example, too large ++ * node length in the common header could cause UBIFS to read memory outside of ++ * allocated buffer when checking the CRC checksum. ++ * ++ * This function may skip data nodes CRC checking if @c->no_chk_data_crc is ++ * true, which is controlled by corresponding UBIFS mount option. However, if ++ * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is ++ * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is ++ * ignored and CRC is checked. ++ * ++ * This function returns zero in case of success and %-EUCLEAN in case of bad ++ * CRC or magic. ++ */ ++int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, ++ int offs, int quiet, int must_chk_crc) ++{ ++ int err = -EINVAL, type, node_len; ++ uint32_t crc, node_crc, magic; ++ const struct ubifs_ch *ch = buf; ++ ++ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ++ ubifs_assert(!(offs & 7) && offs < c->leb_size); ++ ++ magic = le32_to_cpu(ch->magic); ++ if (magic != UBIFS_NODE_MAGIC) { ++ if (!quiet) ++ ubifs_err("bad magic %#08x, expected %#08x", ++ magic, UBIFS_NODE_MAGIC); ++ err = -EUCLEAN; ++ goto out; ++ } ++ ++ type = ch->node_type; ++ if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { ++ if (!quiet) ++ ubifs_err("bad node type %d", type); ++ goto out; ++ } ++ ++ node_len = le32_to_cpu(ch->len); ++ if (node_len + offs > c->leb_size) ++ goto out_len; ++ ++ if (c->ranges[type].max_len == 0) { ++ if (node_len != c->ranges[type].len) ++ goto out_len; ++ } else if (node_len < c->ranges[type].min_len || ++ node_len > c->ranges[type].max_len) ++ goto out_len; ++ ++ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && ++ c->no_chk_data_crc) ++ return 0; ++ ++ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); ++ node_crc = le32_to_cpu(ch->crc); ++ if (crc != node_crc) { ++ if (!quiet) ++ ubifs_err("bad CRC: calculated %#08x, read %#08x", ++ crc, node_crc); ++ err = -EUCLEAN; ++ goto out; ++ } ++ ++ return 0; ++ ++out_len: ++ if (!quiet) ++ ubifs_err("bad node length %d", node_len); ++out: ++ if (!quiet) { ++ ubifs_err("bad node at LEB %d:%d", lnum, offs); ++ dbg_dump_node(c, buf); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++/** ++ * ubifs_pad - pad flash space. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to put padding to ++ * @pad: how many bytes to pad ++ * ++ * The flash media obliges us to write only in chunks of %c->min_io_size and ++ * when we have to write less data we add padding node to the write-buffer and ++ * pad it to the next minimal I/O unit's boundary. Padding nodes help when the ++ * media is being scanned. If the amount of wasted space is not enough to fit a ++ * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes ++ * pattern (%UBIFS_PADDING_BYTE). ++ * ++ * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is ++ * used. ++ */ ++void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) ++{ ++ uint32_t crc; ++ ++ ubifs_assert(pad >= 0 && !(pad & 7)); ++ ++ if (pad >= UBIFS_PAD_NODE_SZ) { ++ struct ubifs_ch *ch = buf; ++ struct ubifs_pad_node *pad_node = buf; ++ ++ ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); ++ ch->node_type = UBIFS_PAD_NODE; ++ ch->group_type = UBIFS_NO_NODE_GROUP; ++ ch->padding[0] = ch->padding[1] = 0; ++ ch->sqnum = 0; ++ ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); ++ pad -= UBIFS_PAD_NODE_SZ; ++ pad_node->pad_len = cpu_to_le32(pad); ++ crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); ++ ch->crc = cpu_to_le32(crc); ++ memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); ++ } else if (pad > 0) ++ /* Too little space, padding node won't fit */ ++ memset(buf, UBIFS_PADDING_BYTE, pad); ++} ++ ++/** ++ * next_sqnum - get next sequence number. ++ * @c: UBIFS file-system description object ++ */ ++static unsigned long long next_sqnum(struct ubifs_info *c) ++{ ++ unsigned long long sqnum; ++ ++ spin_lock(&c->cnt_lock); ++ sqnum = ++c->max_sqnum; ++ spin_unlock(&c->cnt_lock); ++ ++ if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { ++ if (sqnum >= SQNUM_WATERMARK) { ++ ubifs_err("sequence number overflow %llu, end of life", ++ sqnum); ++ ubifs_ro_mode(c, -EINVAL); ++ } ++ ubifs_warn("running out of sequence numbers, end of life soon"); ++ } ++ ++ return sqnum; ++} ++ ++/** ++ * ubifs_prepare_node - prepare node to be written to flash. ++ * @c: UBIFS file-system description object ++ * @node: the node to pad ++ * @len: node length ++ * @pad: if the buffer has to be padded ++ * ++ * This function prepares node at @node to be written to the media - it ++ * calculates node CRC, fills the common header, and adds proper padding up to ++ * the next minimum I/O unit if @pad is not zero. ++ */ ++void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) ++{ ++ uint32_t crc; ++ struct ubifs_ch *ch = node; ++ unsigned long long sqnum = next_sqnum(c); ++ ++ ubifs_assert(len >= UBIFS_CH_SZ); ++ ++ ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); ++ ch->len = cpu_to_le32(len); ++ ch->group_type = UBIFS_NO_NODE_GROUP; ++ ch->sqnum = cpu_to_le64(sqnum); ++ ch->padding[0] = ch->padding[1] = 0; ++ crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); ++ ch->crc = cpu_to_le32(crc); ++ ++ if (pad) { ++ len = ALIGN(len, 8); ++ pad = ALIGN(len, c->min_io_size) - len; ++ ubifs_pad(c, node + len, pad); ++ } ++} ++ ++/** ++ * ubifs_prep_grp_node - prepare node of a group to be written to flash. ++ * @c: UBIFS file-system description object ++ * @node: the node to pad ++ * @len: node length ++ * @last: indicates the last node of the group ++ * ++ * This function prepares node at @node to be written to the media - it ++ * calculates node CRC and fills the common header. ++ */ ++void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) ++{ ++ uint32_t crc; ++ struct ubifs_ch *ch = node; ++ unsigned long long sqnum = next_sqnum(c); ++ ++ ubifs_assert(len >= UBIFS_CH_SZ); ++ ++ ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); ++ ch->len = cpu_to_le32(len); ++ if (last) ++ ch->group_type = UBIFS_LAST_OF_NODE_GROUP; ++ else ++ ch->group_type = UBIFS_IN_NODE_GROUP; ++ ch->sqnum = cpu_to_le64(sqnum); ++ ch->padding[0] = ch->padding[1] = 0; ++ crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); ++ ch->crc = cpu_to_le32(crc); ++} ++ ++/** ++ * wbuf_timer_callback - write-buffer timer callback function. ++ * @data: timer data (write-buffer descriptor) ++ * ++ * This function is called when the write-buffer timer expires. ++ */ ++static void wbuf_timer_callback_nolock(unsigned long data) ++{ ++ struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data; ++ ++ wbuf->need_sync = 1; ++ wbuf->c->need_wbuf_sync = 1; ++ ubifs_wake_up_bgt(wbuf->c); ++} ++ ++/** ++ * new_wbuf_timer - start new write-buffer timer. ++ * @wbuf: write-buffer descriptor ++ */ ++static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) ++{ ++ ubifs_assert(!timer_pending(&wbuf->timer)); ++ ++ if (!wbuf->timeout) ++ return; ++ ++ wbuf->timer.expires = jiffies + wbuf->timeout; ++ add_timer(&wbuf->timer); ++} ++ ++/** ++ * cancel_wbuf_timer - cancel write-buffer timer. ++ * @wbuf: write-buffer descriptor ++ */ ++static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) ++{ ++ /* ++ * If the syncer is waiting for the lock (from the background thread's ++ * context) and another task is changing write-buffer then the syncing ++ * should be canceled. ++ */ ++ wbuf->need_sync = 0; ++ del_timer(&wbuf->timer); ++} ++ ++/** ++ * ubifs_wbuf_sync_nolock - synchronize write-buffer. ++ * @wbuf: write-buffer to synchronize ++ * ++ * This function synchronizes write-buffer @buf and returns zero in case of ++ * success or a negative error code in case of failure. ++ */ ++int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) ++{ ++ struct ubifs_info *c = wbuf->c; ++ int err, dirt; ++ ++ cancel_wbuf_timer_nolock(wbuf); ++ if (!wbuf->used || wbuf->lnum == -1) ++ /* Write-buffer is empty or not seeked */ ++ return 0; ++ ++ dbg_io("LEB %d:%d, %d bytes", ++ wbuf->lnum, wbuf->offs, wbuf->used); ++ ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); ++ ubifs_assert(!(wbuf->avail & 7)); ++ ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); ++ ++ if (c->ro_media) ++ return -EROFS; ++ ++ ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); ++ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, ++ c->min_io_size, wbuf->dtype); ++ if (err) { ++ ubifs_err("cannot write %d bytes to LEB %d:%d", ++ c->min_io_size, wbuf->lnum, wbuf->offs); ++ dbg_dump_stack(); ++ return err; ++ } ++ ++ dirt = wbuf->avail; ++ ++ spin_lock(&wbuf->lock); ++ wbuf->offs += c->min_io_size; ++ wbuf->avail = c->min_io_size; ++ wbuf->used = 0; ++ wbuf->next_ino = 0; ++ spin_unlock(&wbuf->lock); ++ ++ if (wbuf->sync_callback) ++ err = wbuf->sync_callback(c, wbuf->lnum, ++ c->leb_size - wbuf->offs, dirt); ++ return err; ++} ++ ++/** ++ * ubifs_wbuf_seek_nolock - seek write-buffer. ++ * @wbuf: write-buffer ++ * @lnum: logical eraseblock number to seek to ++ * @offs: logical eraseblock offset to seek to ++ * @dtype: data type ++ * ++ * This function targets the write buffer to logical eraseblock @lnum:@offs. ++ * The write-buffer is synchronized if it is not empty. Returns zero in case of ++ * success and a negative error code in case of failure. ++ */ ++int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, ++ int dtype) ++{ ++ const struct ubifs_info *c = wbuf->c; ++ ++ dbg_io("LEB %d:%d", lnum, offs); ++ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); ++ ubifs_assert(offs >= 0 && offs <= c->leb_size); ++ ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); ++ ubifs_assert(lnum != wbuf->lnum); ++ ++ if (wbuf->used > 0) { ++ int err = ubifs_wbuf_sync_nolock(wbuf); ++ ++ if (err) ++ return err; ++ } ++ ++ spin_lock(&wbuf->lock); ++ wbuf->lnum = lnum; ++ wbuf->offs = offs; ++ wbuf->avail = c->min_io_size; ++ wbuf->used = 0; ++ spin_unlock(&wbuf->lock); ++ wbuf->dtype = dtype; ++ ++ return 0; ++} ++ ++/** ++ * ubifs_bg_wbufs_sync - synchronize write-buffers. ++ * @c: UBIFS file-system description object ++ * ++ * This function is called by background thread to synchronize write-buffers. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_bg_wbufs_sync(struct ubifs_info *c) ++{ ++ int err, i; ++ ++ if (!c->need_wbuf_sync) ++ return 0; ++ c->need_wbuf_sync = 0; ++ ++ if (c->ro_media) { ++ err = -EROFS; ++ goto out_timers; ++ } ++ ++ dbg_io("synchronize"); ++ for (i = 0; i < c->jhead_cnt; i++) { ++ struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; ++ ++ cond_resched(); ++ ++ /* ++ * If the mutex is locked then wbuf is being changed, so ++ * synchronization is not necessary. ++ */ ++ if (mutex_is_locked(&wbuf->io_mutex)) ++ continue; ++ ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ if (!wbuf->need_sync) { ++ mutex_unlock(&wbuf->io_mutex); ++ continue; ++ } ++ ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ mutex_unlock(&wbuf->io_mutex); ++ if (err) { ++ ubifs_err("cannot sync write-buffer, error %d", err); ++ ubifs_ro_mode(c, err); ++ goto out_timers; ++ } ++ } ++ ++ return 0; ++ ++out_timers: ++ /* Cancel all timers to prevent repeated errors */ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; ++ ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ cancel_wbuf_timer_nolock(wbuf); ++ mutex_unlock(&wbuf->io_mutex); ++ } ++ return err; ++} ++ ++/** ++ * ubifs_wbuf_write_nolock - write data to flash via write-buffer. ++ * @wbuf: write-buffer ++ * @buf: node to write ++ * @len: node length ++ * ++ * This function writes data to flash via write-buffer @wbuf. This means that ++ * the last piece of the node won't reach the flash media immediately if it ++ * does not take whole minimal I/O unit. Instead, the node will sit in RAM ++ * until the write-buffer is synchronized (e.g., by timer). ++ * ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. If the node cannot be written because there is no more ++ * space in this logical eraseblock, %-ENOSPC is returned. ++ */ ++int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ++{ ++ struct ubifs_info *c = wbuf->c; ++ int err, written, n, aligned_len = ALIGN(len, 8), offs; ++ ++ dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, ++ dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, ++ wbuf->offs + wbuf->used); ++ ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); ++ ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); ++ ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); ++ ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); ++ ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ++ ++ if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { ++ err = -ENOSPC; ++ goto out; ++ } ++ ++ cancel_wbuf_timer_nolock(wbuf); ++ ++ if (c->ro_media) ++ return -EROFS; ++ ++ if (aligned_len <= wbuf->avail) { ++ /* ++ * The node is not very large and fits entirely within ++ * write-buffer. ++ */ ++ memcpy(wbuf->buf + wbuf->used, buf, len); ++ ++ if (aligned_len == wbuf->avail) { ++ dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, ++ wbuf->offs); ++ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, ++ wbuf->offs, c->min_io_size, ++ wbuf->dtype); ++ if (err) ++ goto out; ++ ++ spin_lock(&wbuf->lock); ++ wbuf->offs += c->min_io_size; ++ wbuf->avail = c->min_io_size; ++ wbuf->used = 0; ++ wbuf->next_ino = 0; ++ spin_unlock(&wbuf->lock); ++ } else { ++ spin_lock(&wbuf->lock); ++ wbuf->avail -= aligned_len; ++ wbuf->used += aligned_len; ++ spin_unlock(&wbuf->lock); ++ } ++ ++ goto exit; ++ } ++ ++ /* ++ * The node is large enough and does not fit entirely within current ++ * minimal I/O unit. We have to fill and flush write-buffer and switch ++ * to the next min. I/O unit. ++ */ ++ dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); ++ memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); ++ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, ++ c->min_io_size, wbuf->dtype); ++ if (err) ++ goto out; ++ ++ offs = wbuf->offs + c->min_io_size; ++ len -= wbuf->avail; ++ aligned_len -= wbuf->avail; ++ written = wbuf->avail; ++ ++ /* ++ * The remaining data may take more whole min. I/O units, so write the ++ * remains multiple to min. I/O unit size directly to the flash media. ++ * We align node length to 8-byte boundary because we anyway flash wbuf ++ * if the remaining space is less than 8 bytes. ++ */ ++ n = aligned_len >> c->min_io_shift; ++ if (n) { ++ n <<= c->min_io_shift; ++ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); ++ err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, ++ wbuf->dtype); ++ if (err) ++ goto out; ++ offs += n; ++ aligned_len -= n; ++ len -= n; ++ written += n; ++ } ++ ++ spin_lock(&wbuf->lock); ++ if (aligned_len) ++ /* ++ * And now we have what's left and what does not take whole ++ * min. I/O unit, so write it to the write-buffer and we are ++ * done. ++ */ ++ memcpy(wbuf->buf, buf + written, len); ++ ++ wbuf->offs = offs; ++ wbuf->used = aligned_len; ++ wbuf->avail = c->min_io_size - aligned_len; ++ wbuf->next_ino = 0; ++ spin_unlock(&wbuf->lock); ++ ++exit: ++ if (wbuf->sync_callback) { ++ int free = c->leb_size - wbuf->offs - wbuf->used; ++ ++ err = wbuf->sync_callback(c, wbuf->lnum, free, 0); ++ if (err) ++ goto out; ++ } ++ ++ if (wbuf->used) ++ new_wbuf_timer_nolock(wbuf); ++ ++ return 0; ++ ++out: ++ ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", ++ len, wbuf->lnum, wbuf->offs, err); ++ dbg_dump_node(c, buf); ++ dbg_dump_stack(); ++ dbg_dump_leb(c, wbuf->lnum); ++ return err; ++} ++ ++/** ++ * ubifs_write_node - write node to the media. ++ * @c: UBIFS file-system description object ++ * @buf: the node to write ++ * @len: node length ++ * @lnum: logical eraseblock number ++ * @offs: offset within the logical eraseblock ++ * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) ++ * ++ * This function automatically fills node magic number, assigns sequence ++ * number, and calculates node CRC checksum. The length of the @buf buffer has ++ * to be aligned to the minimal I/O unit size. This function automatically ++ * appends padding node and padding bytes if needed. Returns zero in case of ++ * success and a negative error code in case of failure. ++ */ ++int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, ++ int offs, int dtype) ++{ ++ int err, buf_len = ALIGN(len, c->min_io_size); ++ ++ dbg_io("LEB %d:%d, %s, length %d (aligned %d)", ++ lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, ++ buf_len); ++ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ++ ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); ++ ++ if (c->ro_media) ++ return -EROFS; ++ ++ ubifs_prepare_node(c, buf, len, 1); ++ err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); ++ if (err) { ++ ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", ++ buf_len, lnum, offs, err); ++ dbg_dump_node(c, buf); ++ dbg_dump_stack(); ++ } ++ ++ return err; ++} ++ ++/** ++ * ubifs_read_node_wbuf - read node from the media or write-buffer. ++ * @wbuf: wbuf to check for un-written data ++ * @buf: buffer to read to ++ * @type: node type ++ * @len: node length ++ * @lnum: logical eraseblock number ++ * @offs: offset within the logical eraseblock ++ * ++ * This function reads a node of known type and length, checks it and stores ++ * in @buf. If the node partially or fully sits in the write-buffer, this ++ * function takes data from the buffer, otherwise it reads the flash media. ++ * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative ++ * error code in case of failure. ++ */ ++int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, ++ int lnum, int offs) ++{ ++ const struct ubifs_info *c = wbuf->c; ++ int err, rlen, overlap; ++ struct ubifs_ch *ch = buf; ++ ++ dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); ++ ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ++ ubifs_assert(!(offs & 7) && offs < c->leb_size); ++ ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); ++ ++ spin_lock(&wbuf->lock); ++ overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); ++ if (!overlap) { ++ /* We may safely unlock the write-buffer and read the data */ ++ spin_unlock(&wbuf->lock); ++ return ubifs_read_node(c, buf, type, len, lnum, offs); ++ } ++ ++ /* Don't read under wbuf */ ++ rlen = wbuf->offs - offs; ++ if (rlen < 0) ++ rlen = 0; ++ ++ /* Copy the rest from the write-buffer */ ++ memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); ++ spin_unlock(&wbuf->lock); ++ ++ if (rlen > 0) { ++ /* Read everything that goes before write-buffer */ ++ err = ubi_read(c->ubi, lnum, buf, offs, rlen); ++ if (err && err != -EBADMSG) { ++ ubifs_err("failed to read node %d from LEB %d:%d, " ++ "error %d", type, lnum, offs, err); ++ dbg_dump_stack(); ++ return err; ++ } ++ } ++ ++ if (type != ch->node_type) { ++ ubifs_err("bad node type (%d but expected %d)", ++ ch->node_type, type); ++ goto out; ++ } ++ ++ err = ubifs_check_node(c, buf, lnum, offs, 0, 0); ++ if (err) { ++ ubifs_err("expected node type %d", type); ++ return err; ++ } ++ ++ rlen = le32_to_cpu(ch->len); ++ if (rlen != len) { ++ ubifs_err("bad node length %d, expected %d", rlen, len); ++ goto out; ++ } ++ ++ return 0; ++ ++out: ++ ubifs_err("bad node at LEB %d:%d", lnum, offs); ++ dbg_dump_node(c, buf); ++ dbg_dump_stack(); ++ return -EINVAL; ++} ++ ++/** ++ * ubifs_read_node - read node. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to read to ++ * @type: node type ++ * @len: node length (not aligned) ++ * @lnum: logical eraseblock number ++ * @offs: offset within the logical eraseblock ++ * ++ * This function reads a node of known type and and length, checks it and ++ * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched ++ * and a negative error code in case of failure. ++ */ ++int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ++ int lnum, int offs) ++{ ++ int err, l; ++ struct ubifs_ch *ch = buf; ++ ++ dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); ++ ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ++ ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); ++ ubifs_assert(!(offs & 7) && offs < c->leb_size); ++ ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); ++ ++ err = ubi_read(c->ubi, lnum, buf, offs, len); ++ if (err && err != -EBADMSG) { ++ ubifs_err("cannot read node %d from LEB %d:%d, error %d", ++ type, lnum, offs, err); ++ return err; ++ } ++ ++ if (type != ch->node_type) { ++ ubifs_err("bad node type (%d but expected %d)", ++ ch->node_type, type); ++ goto out; ++ } ++ ++ err = ubifs_check_node(c, buf, lnum, offs, 0, 0); ++ if (err) { ++ ubifs_err("expected node type %d", type); ++ return err; ++ } ++ ++ l = le32_to_cpu(ch->len); ++ if (l != len) { ++ ubifs_err("bad node length %d, expected %d", l, len); ++ goto out; ++ } ++ ++ return 0; ++ ++out: ++ ubifs_err("bad node at LEB %d:%d", lnum, offs); ++ dbg_dump_node(c, buf); ++ dbg_dump_stack(); ++ return -EINVAL; ++} ++ ++/** ++ * ubifs_wbuf_init - initialize write-buffer. ++ * @c: UBIFS file-system description object ++ * @wbuf: write-buffer to initialize ++ * ++ * This function initializes write buffer. Returns zero in case of success ++ * %-ENOMEM in case of failure. ++ */ ++int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) ++{ ++ size_t size; ++ ++ wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); ++ if (!wbuf->buf) ++ return -ENOMEM; ++ ++ size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); ++ wbuf->inodes = kmalloc(size, GFP_KERNEL); ++ if (!wbuf->inodes) { ++ kfree(wbuf->buf); ++ wbuf->buf = NULL; ++ return -ENOMEM; ++ } ++ ++ wbuf->used = 0; ++ wbuf->lnum = wbuf->offs = -1; ++ wbuf->avail = c->min_io_size; ++ wbuf->dtype = UBI_UNKNOWN; ++ wbuf->sync_callback = NULL; ++ mutex_init(&wbuf->io_mutex); ++ spin_lock_init(&wbuf->lock); ++ ++ wbuf->c = c; ++ init_timer(&wbuf->timer); ++ wbuf->timer.function = wbuf_timer_callback_nolock; ++ wbuf->timer.data = (unsigned long)wbuf; ++ wbuf->timeout = DEFAULT_WBUF_TIMEOUT; ++ wbuf->next_ino = 0; ++ ++ return 0; ++} ++ ++/** ++ * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. ++ * @wbuf: the write-buffer whereto add ++ * @inum: the inode number ++ * ++ * This function adds an inode number to the inode array of the write-buffer. ++ */ ++void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) ++{ ++ if (!wbuf->buf) ++ /* NOR flash or something similar */ ++ return; ++ ++ spin_lock(&wbuf->lock); ++ if (wbuf->used) ++ wbuf->inodes[wbuf->next_ino++] = inum; ++ spin_unlock(&wbuf->lock); ++} ++ ++/** ++ * wbuf_has_ino - returns if the wbuf contains data from the inode. ++ * @wbuf: the write-buffer ++ * @inum: the inode number ++ * ++ * This function returns with %1 if the write-buffer contains some data from the ++ * given inode otherwise it returns with %0. ++ */ ++static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) ++{ ++ int i, ret = 0; ++ ++ spin_lock(&wbuf->lock); ++ for (i = 0; i < wbuf->next_ino; i++) ++ if (inum == wbuf->inodes[i]) { ++ ret = 1; ++ break; ++ } ++ spin_unlock(&wbuf->lock); ++ ++ return ret; ++} ++ ++/** ++ * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. ++ * @c: UBIFS file-system description object ++ * @inode: inode to synchronize ++ * ++ * This function synchronizes write-buffers which contain nodes belonging to ++ * @inode. Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) ++{ ++ int i, err = 0; ++ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; ++ ++ if (i == GCHD) ++ /* ++ * GC head is special, do not look at it. Even if the ++ * head contains something related to this inode, it is ++ * a _copy_ of corresponding on-flash node which sits ++ * somewhere else. ++ */ ++ continue; ++ ++ if (!wbuf_has_ino(wbuf, inode->i_ino)) ++ continue; ++ ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ if (wbuf_has_ino(wbuf, inode->i_ino)) ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ mutex_unlock(&wbuf->io_mutex); ++ ++ if (err) { ++ ubifs_ro_mode(c, err); ++ return err; ++ } ++ } ++ return 0; ++} +diff -Nurd linux-2.6.24/fs/ubifs/ioctl.c ubifs-v2.6.24/fs/ubifs/ioctl.c +--- linux-2.6.24/fs/ubifs/ioctl.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/ioctl.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,196 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * Copyright (C) 2006, 2007 University of Szeged, Hungary ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Zoltan Sogor ++ * Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* This file implements EXT2-compatible extended attribute ioctl() calls */ ++ ++#include <linux/compat.h> ++#include <linux/smp_lock.h> ++#include "ubifs.h" ++ ++/** ++ * ubifs_set_inode_flags - set VFS inode flags. ++ * @inode: VFS inode to set flags for ++ * ++ * This function propagates flags from UBIFS inode object to VFS inode object. ++ */ ++void ubifs_set_inode_flags(struct inode *inode) ++{ ++ unsigned int flags = ubifs_inode(inode)->flags; ++ ++ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC); ++ if (flags & UBIFS_SYNC_FL) ++ inode->i_flags |= S_SYNC; ++ if (flags & UBIFS_APPEND_FL) ++ inode->i_flags |= S_APPEND; ++ if (flags & UBIFS_IMMUTABLE_FL) ++ inode->i_flags |= S_IMMUTABLE; ++ if (flags & UBIFS_DIRSYNC_FL) ++ inode->i_flags |= S_DIRSYNC; ++} ++ ++/* ++ * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags. ++ * @ioctl_flags: flags to convert ++ * ++ * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags ++ * (@UBIFS_COMPR_FL, etc). ++ */ ++static int ioctl2ubifs(int ioctl_flags) ++{ ++ int ubifs_flags = 0; ++ ++ if (ioctl_flags & FS_COMPR_FL) ++ ubifs_flags |= UBIFS_COMPR_FL; ++ if (ioctl_flags & FS_SYNC_FL) ++ ubifs_flags |= UBIFS_SYNC_FL; ++ if (ioctl_flags & FS_APPEND_FL) ++ ubifs_flags |= UBIFS_APPEND_FL; ++ if (ioctl_flags & FS_IMMUTABLE_FL) ++ ubifs_flags |= UBIFS_IMMUTABLE_FL; ++ if (ioctl_flags & FS_DIRSYNC_FL) ++ ubifs_flags |= UBIFS_DIRSYNC_FL; ++ ++ return ubifs_flags; ++} ++ ++/* ++ * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags. ++ * @ubifs_flags: flags to convert ++ * ++ * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags ++ * (@FS_COMPR_FL, etc). ++ */ ++static int ubifs2ioctl(int ubifs_flags) ++{ ++ int ioctl_flags = 0; ++ ++ if (ubifs_flags & UBIFS_COMPR_FL) ++ ioctl_flags |= FS_COMPR_FL; ++ if (ubifs_flags & UBIFS_SYNC_FL) ++ ioctl_flags |= FS_SYNC_FL; ++ if (ubifs_flags & UBIFS_APPEND_FL) ++ ioctl_flags |= FS_APPEND_FL; ++ if (ubifs_flags & UBIFS_IMMUTABLE_FL) ++ ioctl_flags |= FS_IMMUTABLE_FL; ++ if (ubifs_flags & UBIFS_DIRSYNC_FL) ++ ioctl_flags |= FS_DIRSYNC_FL; ++ ++ return ioctl_flags; ++} ++ ++static int setflags(struct inode *inode, int flags) ++{ ++ int oldflags, err, release; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ struct ubifs_budget_req req = { .dirtied_ino = 1, ++ .dirtied_ino_d = ui->data_len }; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ /* ++ * The IMMUTABLE and APPEND_ONLY flags can only be changed by ++ * the relevant capability. ++ */ ++ mutex_lock(&ui->ui_mutex); ++ oldflags = ubifs2ioctl(ui->flags); ++ if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { ++ if (!capable(CAP_LINUX_IMMUTABLE)) { ++ err = -EPERM; ++ goto out_unlock; ++ } ++ } ++ ++ ui->flags = ioctl2ubifs(flags); ++ ubifs_set_inode_flags(inode); ++ inode->i_ctime = ubifs_current_time(inode); ++ release = ui->dirty; ++ mark_inode_dirty_sync(inode); ++ mutex_unlock(&ui->ui_mutex); ++ ++ if (release) ++ ubifs_release_budget(c, &req); ++ if (IS_SYNC(inode)) ++ err = write_inode_now(inode, 1); ++ return err; ++ ++out_unlock: ++ ubifs_err("can't modify inode %lu attributes", inode->i_ino); ++ mutex_unlock(&ui->ui_mutex); ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ int flags; ++ struct inode *inode = file->f_path.dentry->d_inode; ++ ++ switch (cmd) { ++ case FS_IOC_GETFLAGS: ++ flags = ubifs2ioctl(ubifs_inode(inode)->flags); ++ ++ dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags); ++ return put_user(flags, (int __user *) arg); ++ ++ case FS_IOC_SETFLAGS: { ++ if (IS_RDONLY(inode)) ++ return -EROFS; ++ ++ if (!is_owner_or_cap(inode)) ++ return -EACCES; ++ ++ if (get_user(flags, (int __user *) arg)) ++ return -EFAULT; ++ ++ if (!S_ISDIR(inode->i_mode)) ++ flags &= ~FS_DIRSYNC_FL; ++ ++ dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); ++ return setflags(inode, flags); ++ } ++ ++ default: ++ return -ENOTTY; ++ } ++} ++ ++#ifdef CONFIG_COMPAT ++long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ switch (cmd) { ++ case FS_IOC32_GETFLAGS: ++ cmd = FS_IOC_GETFLAGS; ++ break; ++ case FS_IOC32_SETFLAGS: ++ cmd = FS_IOC_SETFLAGS; ++ break; ++ default: ++ return -ENOIOCTLCMD; ++ } ++ return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); ++} ++#endif +diff -Nurd linux-2.6.24/fs/ubifs/journal.c ubifs-v2.6.24/fs/ubifs/journal.c +--- linux-2.6.24/fs/ubifs/journal.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/journal.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1442 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file implements UBIFS journal. ++ * ++ * The journal consists of 2 parts - the log and bud LEBs. The log has fixed ++ * length and position, while a bud logical eraseblock is any LEB in the main ++ * area. Buds contain file system data - data nodes, inode nodes, etc. The log ++ * contains only references to buds and some other stuff like commit ++ * start node. The idea is that when we commit the journal, we do ++ * not copy the data, the buds just become indexed. Since after the commit the ++ * nodes in bud eraseblocks become leaf nodes of the file system index tree, we ++ * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will ++ * become leafs in the future. ++ * ++ * The journal is multi-headed because we want to write data to the journal as ++ * optimally as possible. It is nice to have nodes belonging to the same inode ++ * in one LEB, so we may write data owned by different inodes to different ++ * journal heads, although at present only one data head is used. ++ * ++ * For recovery reasons, the base head contains all inode nodes, all directory ++ * entry nodes and all truncate nodes. This means that the other heads contain ++ * only data nodes. ++ * ++ * Bud LEBs may be half-indexed. For example, if the bud was not full at the ++ * time of commit, the bud is retained to continue to be used in the journal, ++ * even though the "front" of the LEB is now indexed. In that case, the log ++ * reference contains the offset where the bud starts for the purposes of the ++ * journal. ++ * ++ * The journal size has to be limited, because the larger is the journal, the ++ * longer it takes to mount UBIFS (scanning the journal) and the more memory it ++ * takes (indexing in the TNC). ++ * ++ * All the journal write operations like 'ubifs_jnl_update()' here, which write ++ * multiple UBIFS nodes to the journal at one go, are atomic with respect to ++ * unclean reboots. Should the unclean reboot happen, the recovery code drops ++ * all the nodes. ++ */ ++ ++#include "ubifs.h" ++ ++/** ++ * zero_ino_node_unused - zero out unused fields of an on-flash inode node. ++ * @ino: the inode to zero out ++ */ ++static inline void zero_ino_node_unused(struct ubifs_ino_node *ino) ++{ ++ memset(ino->padding1, 0, 4); ++ memset(ino->padding2, 0, 26); ++} ++ ++/** ++ * zero_dent_node_unused - zero out unused fields of an on-flash directory ++ * entry node. ++ * @dent: the directory entry to zero out ++ */ ++static inline void zero_dent_node_unused(struct ubifs_dent_node *dent) ++{ ++ dent->padding1 = 0; ++ memset(dent->padding2, 0, 4); ++} ++ ++/** ++ * zero_data_node_unused - zero out unused fields of an on-flash data node. ++ * @data: the data node to zero out ++ */ ++static inline void zero_data_node_unused(struct ubifs_data_node *data) ++{ ++ memset(data->padding, 0, 2); ++} ++ ++/** ++ * zero_trun_node_unused - zero out unused fields of an on-flash truncation ++ * node. ++ * @trun: the truncation node to zero out ++ */ ++static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) ++{ ++ memset(trun->padding, 0, 12); ++} ++ ++/** ++ * reserve_space - reserve space in the journal. ++ * @c: UBIFS file-system description object ++ * @jhead: journal head number ++ * @len: node length ++ * ++ * This function reserves space in journal head @head. If the reservation ++ * succeeded, the journal head stays locked and later has to be unlocked using ++ * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock ++ * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and ++ * other negative error codes in case of other failures. ++ */ ++static int reserve_space(struct ubifs_info *c, int jhead, int len) ++{ ++ int err = 0, err1, retries = 0, avail, lnum, offs, squeeze; ++ struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; ++ ++ /* ++ * Typically, the base head has smaller nodes written to it, so it is ++ * better to try to allocate space at the ends of eraseblocks. This is ++ * what the squeeze parameter does. ++ */ ++ squeeze = (jhead == BASEHD); ++again: ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ ++ if (c->ro_media) { ++ err = -EROFS; ++ goto out_unlock; ++ } ++ ++ avail = c->leb_size - wbuf->offs - wbuf->used; ++ if (wbuf->lnum != -1 && avail >= len) ++ return 0; ++ ++ /* ++ * Write buffer wasn't seek'ed or there is no enough space - look for an ++ * LEB with some empty space. ++ */ ++ lnum = ubifs_find_free_space(c, len, &offs, squeeze); ++ if (lnum >= 0) { ++ /* Found an LEB, add it to the journal head */ ++ err = ubifs_add_bud_to_log(c, jhead, lnum, offs); ++ if (err) ++ goto out_return; ++ /* A new bud was successfully allocated and added to the log */ ++ goto out; ++ } ++ ++ err = lnum; ++ if (err != -ENOSPC) ++ goto out_unlock; ++ ++ /* ++ * No free space, we have to run garbage collector to make ++ * some. But the write-buffer mutex has to be unlocked because ++ * GC also takes it. ++ */ ++ dbg_jnl("no free space jhead %d, run GC", jhead); ++ mutex_unlock(&wbuf->io_mutex); ++ ++ lnum = ubifs_garbage_collect(c, 0); ++ if (lnum < 0) { ++ err = lnum; ++ if (err != -ENOSPC) ++ return err; ++ ++ /* ++ * GC could not make a free LEB. But someone else may ++ * have allocated new bud for this journal head, ++ * because we dropped @wbuf->io_mutex, so try once ++ * again. ++ */ ++ dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); ++ if (retries++ < 2) { ++ dbg_jnl("retry (%d)", retries); ++ goto again; ++ } ++ ++ dbg_jnl("return -ENOSPC"); ++ return err; ++ } ++ ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ dbg_jnl("got LEB %d for jhead %d", lnum, jhead); ++ avail = c->leb_size - wbuf->offs - wbuf->used; ++ ++ if (wbuf->lnum != -1 && avail >= len) { ++ /* ++ * Someone else has switched the journal head and we have ++ * enough space now. This happens when more then one process is ++ * trying to write to the same journal head at the same time. ++ */ ++ dbg_jnl("return LEB %d back, already have LEB %d:%d", ++ lnum, wbuf->lnum, wbuf->offs + wbuf->used); ++ err = ubifs_return_leb(c, lnum); ++ if (err) ++ goto out_unlock; ++ return 0; ++ } ++ ++ err = ubifs_add_bud_to_log(c, jhead, lnum, 0); ++ if (err) ++ goto out_return; ++ offs = 0; ++ ++out: ++ err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); ++ if (err) ++ goto out_unlock; ++ ++ return 0; ++ ++out_unlock: ++ mutex_unlock(&wbuf->io_mutex); ++ return err; ++ ++out_return: ++ /* An error occurred and the LEB has to be returned to lprops */ ++ ubifs_assert(err < 0); ++ err1 = ubifs_return_leb(c, lnum); ++ if (err1 && err == -EAGAIN) ++ /* ++ * Return original error code only if it is not %-EAGAIN, ++ * which is not really an error. Otherwise, return the error ++ * code of 'ubifs_return_leb()'. ++ */ ++ err = err1; ++ mutex_unlock(&wbuf->io_mutex); ++ return err; ++} ++ ++/** ++ * write_node - write node to a journal head. ++ * @c: UBIFS file-system description object ++ * @jhead: journal head ++ * @node: node to write ++ * @len: node length ++ * @lnum: LEB number written is returned here ++ * @offs: offset written is returned here ++ * ++ * This function writes a node to reserved space of journal head @jhead. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++static int write_node(struct ubifs_info *c, int jhead, void *node, int len, ++ int *lnum, int *offs) ++{ ++ struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; ++ ++ ubifs_assert(jhead != GCHD); ++ ++ *lnum = c->jheads[jhead].wbuf.lnum; ++ *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; ++ ++ dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); ++ ubifs_prepare_node(c, node, len, 0); ++ ++ return ubifs_wbuf_write_nolock(wbuf, node, len); ++} ++ ++/** ++ * write_head - write data to a journal head. ++ * @c: UBIFS file-system description object ++ * @jhead: journal head ++ * @buf: buffer to write ++ * @len: length to write ++ * @lnum: LEB number written is returned here ++ * @offs: offset written is returned here ++ * @sync: non-zero if the write-buffer has to by synchronized ++ * ++ * This function is the same as 'write_node()' but it does not assume the ++ * buffer it is writing is a node, so it does not prepare it (which means ++ * initializing common header and calculating CRC). ++ */ ++static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, ++ int *lnum, int *offs, int sync) ++{ ++ int err; ++ struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; ++ ++ ubifs_assert(jhead != GCHD); ++ ++ *lnum = c->jheads[jhead].wbuf.lnum; ++ *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; ++ dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); ++ ++ err = ubifs_wbuf_write_nolock(wbuf, buf, len); ++ if (err) ++ return err; ++ if (sync) ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ return err; ++} ++ ++/** ++ * make_reservation - reserve journal space. ++ * @c: UBIFS file-system description object ++ * @jhead: journal head ++ * @len: how many bytes to reserve ++ * ++ * This function makes space reservation in journal head @jhead. The function ++ * takes the commit lock and locks the journal head, and the caller has to ++ * unlock the head and finish the reservation with 'finish_reservation()'. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ * ++ * Note, the journal head may be unlocked as soon as the data is written, while ++ * the commit lock has to be released after the data has been added to the ++ * TNC. ++ */ ++static int make_reservation(struct ubifs_info *c, int jhead, int len) ++{ ++ int err, cmt_retries = 0, nospc_retries = 0; ++ ++again: ++ down_read(&c->commit_sem); ++ err = reserve_space(c, jhead, len); ++ if (!err) ++ return 0; ++ up_read(&c->commit_sem); ++ ++ if (err == -ENOSPC) { ++ /* ++ * GC could not make any progress. We should try to commit ++ * once because it could make some dirty space and GC would ++ * make progress, so make the error -EAGAIN so that the below ++ * will commit and re-try. ++ */ ++ if (nospc_retries++ < 2) { ++ dbg_jnl("no space, retry"); ++ err = -EAGAIN; ++ } ++ ++ /* ++ * This means that the budgeting is incorrect. We always have ++ * to be able to write to the media, because all operations are ++ * budgeted. Deletions are not budgeted, though, but we reserve ++ * an extra LEB for them. ++ */ ++ } ++ ++ if (err != -EAGAIN) ++ goto out; ++ ++ /* ++ * -EAGAIN means that the journal is full or too large, or the above ++ * code wants to do one commit. Do this and re-try. ++ */ ++ if (cmt_retries > 128) { ++ /* ++ * This should not happen unless the journal size limitations ++ * are too tough. ++ */ ++ ubifs_err("stuck in space allocation"); ++ err = -ENOSPC; ++ goto out; ++ } else if (cmt_retries > 32) ++ ubifs_warn("too many space allocation re-tries (%d)", ++ cmt_retries); ++ ++ dbg_jnl("-EAGAIN, commit and retry (retried %d times)", ++ cmt_retries); ++ cmt_retries += 1; ++ ++ err = ubifs_run_commit(c); ++ if (err) ++ return err; ++ goto again; ++ ++out: ++ ubifs_err("cannot reserve %d bytes in jhead %d, error %d", ++ len, jhead, err); ++ if (err == -ENOSPC) { ++ /* This are some budgeting problems, print useful information */ ++ down_write(&c->commit_sem); ++ spin_lock(&c->space_lock); ++ dbg_dump_stack(); ++ dbg_dump_budg(c); ++ spin_unlock(&c->space_lock); ++ dbg_dump_lprops(c); ++ cmt_retries = dbg_check_lprops(c); ++ up_write(&c->commit_sem); ++ } ++ return err; ++} ++ ++/** ++ * release_head - release a journal head. ++ * @c: UBIFS file-system description object ++ * @jhead: journal head ++ * ++ * This function releases journal head @jhead which was locked by ++ * the 'make_reservation()' function. It has to be called after each successful ++ * 'make_reservation()' invocation. ++ */ ++static inline void release_head(struct ubifs_info *c, int jhead) ++{ ++ mutex_unlock(&c->jheads[jhead].wbuf.io_mutex); ++} ++ ++/** ++ * finish_reservation - finish a reservation. ++ * @c: UBIFS file-system description object ++ * ++ * This function finishes journal space reservation. It must be called after ++ * 'make_reservation()'. ++ */ ++static void finish_reservation(struct ubifs_info *c) ++{ ++ up_read(&c->commit_sem); ++} ++ ++/** ++ * get_dent_type - translate VFS inode mode to UBIFS directory entry type. ++ * @mode: inode mode ++ */ ++static int get_dent_type(int mode) ++{ ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ return UBIFS_ITYPE_REG; ++ case S_IFDIR: ++ return UBIFS_ITYPE_DIR; ++ case S_IFLNK: ++ return UBIFS_ITYPE_LNK; ++ case S_IFBLK: ++ return UBIFS_ITYPE_BLK; ++ case S_IFCHR: ++ return UBIFS_ITYPE_CHR; ++ case S_IFIFO: ++ return UBIFS_ITYPE_FIFO; ++ case S_IFSOCK: ++ return UBIFS_ITYPE_SOCK; ++ default: ++ BUG(); ++ } ++ return 0; ++} ++ ++/** ++ * pack_inode - pack an inode node. ++ * @c: UBIFS file-system description object ++ * @ino: buffer in which to pack inode node ++ * @inode: inode to pack ++ * @last: indicates the last node of the group ++ */ ++static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, ++ const struct inode *inode, int last) ++{ ++ int data_len = 0, last_reference = !inode->i_nlink; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ ino->ch.node_type = UBIFS_INO_NODE; ++ ino_key_init_flash(c, &ino->key, inode->i_ino); ++ ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum); ++ ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec); ++ ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); ++ ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec); ++ ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ++ ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); ++ ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); ++ ino->uid = cpu_to_le32(inode->i_uid); ++ ino->gid = cpu_to_le32(inode->i_gid); ++ ino->mode = cpu_to_le32(inode->i_mode); ++ ino->flags = cpu_to_le32(ui->flags); ++ ino->size = cpu_to_le64(ui->ui_size); ++ ino->nlink = cpu_to_le32(inode->i_nlink); ++ ino->compr_type = cpu_to_le16(ui->compr_type); ++ ino->data_len = cpu_to_le32(ui->data_len); ++ ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt); ++ ino->xattr_size = cpu_to_le32(ui->xattr_size); ++ ino->xattr_names = cpu_to_le32(ui->xattr_names); ++ zero_ino_node_unused(ino); ++ ++ /* ++ * Drop the attached data if this is a deletion inode, the data is not ++ * needed anymore. ++ */ ++ if (!last_reference) { ++ memcpy(ino->data, ui->data, ui->data_len); ++ data_len = ui->data_len; ++ } ++ ++ ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last); ++} ++ ++/** ++ * mark_inode_clean - mark UBIFS inode as clean. ++ * @c: UBIFS file-system description object ++ * @ui: UBIFS inode to mark as clean ++ * ++ * This helper function marks UBIFS inode @ui as clean by cleaning the ++ * @ui->dirty flag and releasing its budget. Note, VFS may still treat the ++ * inode as dirty and try to write it back, but 'ubifs_write_inode()' would ++ * just do nothing. ++ */ ++static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui) ++{ ++ if (ui->dirty) ++ ubifs_release_dirty_inode_budget(c, ui); ++ ui->dirty = 0; ++} ++ ++/** ++ * ubifs_jnl_update - update inode. ++ * @c: UBIFS file-system description object ++ * @dir: parent inode or host inode in case of extended attributes ++ * @nm: directory entry name ++ * @inode: inode to update ++ * @deletion: indicates a directory entry deletion i.e unlink or rmdir ++ * @xent: non-zero if the directory entry is an extended attribute entry ++ * ++ * This function updates an inode by writing a directory entry (or extended ++ * attribute entry), the inode itself, and the parent directory inode (or the ++ * host inode) to the journal. ++ * ++ * The function writes the host inode @dir last, which is important in case of ++ * extended attributes. Indeed, then we guarantee that if the host inode gets ++ * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed, ++ * the extended attribute inode gets flushed too. And this is exactly what the ++ * user expects - synchronizing the host inode synchronizes its extended ++ * attributes. Similarly, this guarantees that if @dir is synchronized, its ++ * directory entry corresponding to @nm gets synchronized too. ++ * ++ * If the inode (@inode) or the parent directory (@dir) are synchronous, this ++ * function synchronizes the write-buffer. ++ * ++ * This function marks the @dir and @inode inodes as clean and returns zero on ++ * success. In case of failure, a negative error code is returned. ++ */ ++int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, ++ const struct qstr *nm, const struct inode *inode, ++ int deletion, int xent) ++{ ++ int err, dlen, ilen, len, lnum, ino_offs, dent_offs; ++ int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir); ++ int last_reference = !!(deletion && inode->i_nlink == 0); ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ struct ubifs_inode *dir_ui = ubifs_inode(dir); ++ struct ubifs_dent_node *dent; ++ struct ubifs_ino_node *ino; ++ union ubifs_key dent_key, ino_key; ++ ++ dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu", ++ inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino); ++ ubifs_assert(dir_ui->data_len == 0); ++ ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex)); ++ ++ dlen = UBIFS_DENT_NODE_SZ + nm->len + 1; ++ ilen = UBIFS_INO_NODE_SZ; ++ ++ /* ++ * If the last reference to the inode is being deleted, then there is ++ * no need to attach and write inode data, it is being deleted anyway. ++ * And if the inode is being deleted, no need to synchronize ++ * write-buffer even if the inode is synchronous. ++ */ ++ if (!last_reference) { ++ ilen += ui->data_len; ++ sync |= IS_SYNC(inode); ++ } ++ ++ aligned_dlen = ALIGN(dlen, 8); ++ aligned_ilen = ALIGN(ilen, 8); ++ len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ; ++ dent = kmalloc(len, GFP_NOFS); ++ if (!dent) ++ return -ENOMEM; ++ ++ /* Make reservation before allocating sequence numbers */ ++ err = make_reservation(c, BASEHD, len); ++ if (err) ++ goto out_free; ++ ++ if (!xent) { ++ dent->ch.node_type = UBIFS_DENT_NODE; ++ dent_key_init(c, &dent_key, dir->i_ino, nm); ++ } else { ++ dent->ch.node_type = UBIFS_XENT_NODE; ++ xent_key_init(c, &dent_key, dir->i_ino, nm); ++ } ++ ++ key_write(c, &dent_key, dent->key); ++ dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino); ++ dent->type = get_dent_type(inode->i_mode); ++ dent->nlen = cpu_to_le16(nm->len); ++ memcpy(dent->name, nm->name, nm->len); ++ dent->name[nm->len] = '\0'; ++ zero_dent_node_unused(dent); ++ ubifs_prep_grp_node(c, dent, dlen, 0); ++ ++ ino = (void *)dent + aligned_dlen; ++ pack_inode(c, ino, inode, 0); ++ ino = (void *)ino + aligned_ilen; ++ pack_inode(c, ino, dir, 1); ++ ++ if (last_reference) { ++ err = ubifs_add_orphan(c, inode->i_ino); ++ if (err) { ++ release_head(c, BASEHD); ++ goto out_finish; ++ } ++ ui->del_cmtno = c->cmt_no; ++ } ++ ++ err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); ++ if (err) ++ goto out_release; ++ if (!sync) { ++ struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; ++ ++ ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); ++ ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino); ++ } ++ release_head(c, BASEHD); ++ kfree(dent); ++ ++ if (deletion) { ++ err = ubifs_tnc_remove_nm(c, &dent_key, nm); ++ if (err) ++ goto out_ro; ++ err = ubifs_add_dirt(c, lnum, dlen); ++ } else ++ err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm); ++ if (err) ++ goto out_ro; ++ ++ /* ++ * Note, we do not remove the inode from TNC even if the last reference ++ * to it has just been deleted, because the inode may still be opened. ++ * Instead, the inode has been added to orphan lists and the orphan ++ * subsystem will take further care about it. ++ */ ++ ino_key_init(c, &ino_key, inode->i_ino); ++ ino_offs = dent_offs + aligned_dlen; ++ err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen); ++ if (err) ++ goto out_ro; ++ ++ ino_key_init(c, &ino_key, dir->i_ino); ++ ino_offs += aligned_ilen; ++ err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ); ++ if (err) ++ goto out_ro; ++ ++ finish_reservation(c); ++ spin_lock(&ui->ui_lock); ++ ui->synced_i_size = ui->ui_size; ++ spin_unlock(&ui->ui_lock); ++ mark_inode_clean(c, ui); ++ mark_inode_clean(c, dir_ui); ++ return 0; ++ ++out_finish: ++ finish_reservation(c); ++out_free: ++ kfree(dent); ++ return err; ++ ++out_release: ++ release_head(c, BASEHD); ++out_ro: ++ ubifs_ro_mode(c, err); ++ if (last_reference) ++ ubifs_delete_orphan(c, inode->i_ino); ++ finish_reservation(c); ++ return err; ++} ++ ++/** ++ * ubifs_jnl_write_data - write a data node to the journal. ++ * @c: UBIFS file-system description object ++ * @inode: inode the data node belongs to ++ * @key: node key ++ * @buf: buffer to write ++ * @len: data length (must not exceed %UBIFS_BLOCK_SIZE) ++ * ++ * This function writes a data node to the journal. Returns %0 if the data node ++ * was successfully written, and a negative error code in case of failure. ++ */ ++int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ++ const union ubifs_key *key, const void *buf, int len) ++{ ++ struct ubifs_data_node *data; ++ int err, lnum, offs, compr_type, out_len; ++ int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ dbg_jnl("ino %lu, blk %u, len %d, key %s", ++ (unsigned long)key_inum(c, key), key_block(c, key), len, ++ DBGKEY(key)); ++ ubifs_assert(len <= UBIFS_BLOCK_SIZE); ++ ++ data = kmalloc(dlen, GFP_NOFS); ++ if (!data) ++ return -ENOMEM; ++ ++ data->ch.node_type = UBIFS_DATA_NODE; ++ key_write(c, key, &data->key); ++ data->size = cpu_to_le32(len); ++ zero_data_node_unused(data); ++ ++ if (!(ui->flags & UBIFS_COMPR_FL)) ++ /* Compression is disabled for this inode */ ++ compr_type = UBIFS_COMPR_NONE; ++ else ++ compr_type = ui->compr_type; ++ ++ out_len = dlen - UBIFS_DATA_NODE_SZ; ++ ubifs_compress(buf, len, &data->data, &out_len, &compr_type); ++ ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); ++ ++ dlen = UBIFS_DATA_NODE_SZ + out_len; ++ data->compr_type = cpu_to_le16(compr_type); ++ ++ /* Make reservation before allocating sequence numbers */ ++ err = make_reservation(c, DATAHD, dlen); ++ if (err) ++ goto out_free; ++ ++ err = write_node(c, DATAHD, data, dlen, &lnum, &offs); ++ if (err) ++ goto out_release; ++ ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key)); ++ release_head(c, DATAHD); ++ ++ err = ubifs_tnc_add(c, key, lnum, offs, dlen); ++ if (err) ++ goto out_ro; ++ ++ finish_reservation(c); ++ kfree(data); ++ return 0; ++ ++out_release: ++ release_head(c, DATAHD); ++out_ro: ++ ubifs_ro_mode(c, err); ++ finish_reservation(c); ++out_free: ++ kfree(data); ++ return err; ++} ++ ++/** ++ * ubifs_jnl_write_inode - flush inode to the journal. ++ * @c: UBIFS file-system description object ++ * @inode: inode to flush ++ * ++ * This function writes inode @inode to the journal. If the inode is ++ * synchronous, it also synchronizes the write-buffer. Returns zero in case of ++ * success and a negative error code in case of failure. ++ */ ++int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) ++{ ++ int err, lnum, offs; ++ struct ubifs_ino_node *ino; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink; ++ ++ dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); ++ ++ /* ++ * If the inode is being deleted, do not write the attached data. No ++ * need to synchronize the write-buffer either. ++ */ ++ if (!last_reference) { ++ len += ui->data_len; ++ sync = IS_SYNC(inode); ++ } ++ ino = kmalloc(len, GFP_NOFS); ++ if (!ino) ++ return -ENOMEM; ++ ++ /* Make reservation before allocating sequence numbers */ ++ err = make_reservation(c, BASEHD, len); ++ if (err) ++ goto out_free; ++ ++ pack_inode(c, ino, inode, 1); ++ err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); ++ if (err) ++ goto out_release; ++ if (!sync) ++ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, ++ inode->i_ino); ++ release_head(c, BASEHD); ++ ++ if (last_reference) { ++ err = ubifs_tnc_remove_ino(c, inode->i_ino); ++ if (err) ++ goto out_ro; ++ ubifs_delete_orphan(c, inode->i_ino); ++ err = ubifs_add_dirt(c, lnum, len); ++ } else { ++ union ubifs_key key; ++ ++ ino_key_init(c, &key, inode->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs, len); ++ } ++ if (err) ++ goto out_ro; ++ ++ finish_reservation(c); ++ spin_lock(&ui->ui_lock); ++ ui->synced_i_size = ui->ui_size; ++ spin_unlock(&ui->ui_lock); ++ kfree(ino); ++ return 0; ++ ++out_release: ++ release_head(c, BASEHD); ++out_ro: ++ ubifs_ro_mode(c, err); ++ finish_reservation(c); ++out_free: ++ kfree(ino); ++ return err; ++} ++ ++/** ++ * ubifs_jnl_delete_inode - delete an inode. ++ * @c: UBIFS file-system description object ++ * @inode: inode to delete ++ * ++ * This function deletes inode @inode which includes removing it from orphans, ++ * deleting it from TNC and, in some cases, writing a deletion inode to the ++ * journal. ++ * ++ * When regular file inodes are unlinked or a directory inode is removed, the ++ * 'ubifs_jnl_update()' function writes a corresponding deletion inode and ++ * direntry to the media, and adds the inode to orphans. After this, when the ++ * last reference to this inode has been dropped, this function is called. In ++ * general, it has to write one more deletion inode to the media, because if ++ * a commit happened between 'ubifs_jnl_update()' and ++ * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal ++ * anymore, and in fact it might not be on the flash anymore, because it might ++ * have been garbage-collected already. And for optimization reasons UBIFS does ++ * not read the orphan area if it has been unmounted cleanly, so it would have ++ * no indication in the journal that there is a deleted inode which has to be ++ * removed from TNC. ++ * ++ * However, if there was no commit between 'ubifs_jnl_update()' and ++ * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion ++ * inode to the media for the second time. And this is quite a typical case. ++ * ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode) ++{ ++ int err; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ ubifs_assert(inode->i_nlink == 0); ++ ++ if (ui->del_cmtno != c->cmt_no) ++ /* A commit happened for sure */ ++ return ubifs_jnl_write_inode(c, inode); ++ ++ down_read(&c->commit_sem); ++ /* ++ * Check commit number again, because the first test has been done ++ * without @c->commit_sem, so a commit might have happened. ++ */ ++ if (ui->del_cmtno != c->cmt_no) { ++ up_read(&c->commit_sem); ++ return ubifs_jnl_write_inode(c, inode); ++ } ++ ++ err = ubifs_tnc_remove_ino(c, inode->i_ino); ++ if (err) ++ ubifs_ro_mode(c, err); ++ else ++ ubifs_delete_orphan(c, inode->i_ino); ++ up_read(&c->commit_sem); ++ return err; ++} ++ ++/** ++ * ubifs_jnl_rename - rename a directory entry. ++ * @c: UBIFS file-system description object ++ * @old_dir: parent inode of directory entry to rename ++ * @old_dentry: directory entry to rename ++ * @new_dir: parent inode of directory entry to rename ++ * @new_dentry: new directory entry (or directory entry to replace) ++ * @sync: non-zero if the write-buffer has to be synchronized ++ * ++ * This function implements the re-name operation which may involve writing up ++ * to 3 inodes and 2 directory entries. It marks the written inodes as clean ++ * and returns zero on success. In case of failure, a negative error code is ++ * returned. ++ */ ++int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, ++ const struct dentry *old_dentry, ++ const struct inode *new_dir, ++ const struct dentry *new_dentry, int sync) ++{ ++ void *p; ++ union ubifs_key key; ++ struct ubifs_dent_node *dent, *dent2; ++ int err, dlen1, dlen2, ilen, lnum, offs, len; ++ const struct inode *old_inode = old_dentry->d_inode; ++ const struct inode *new_inode = new_dentry->d_inode; ++ int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; ++ int last_reference = !!(new_inode && new_inode->i_nlink == 0); ++ int move = (old_dir != new_dir); ++ struct ubifs_inode *uninitialized_var(new_ui); ++ ++ dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", ++ old_dentry->d_name.len, old_dentry->d_name.name, ++ old_dir->i_ino, new_dentry->d_name.len, ++ new_dentry->d_name.name, new_dir->i_ino); ++ ubifs_assert(ubifs_inode(old_dir)->data_len == 0); ++ ubifs_assert(ubifs_inode(new_dir)->data_len == 0); ++ ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); ++ ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex)); ++ ++ dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1; ++ dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1; ++ if (new_inode) { ++ new_ui = ubifs_inode(new_inode); ++ ubifs_assert(mutex_is_locked(&new_ui->ui_mutex)); ++ ilen = UBIFS_INO_NODE_SZ; ++ if (!last_reference) ++ ilen += new_ui->data_len; ++ } else ++ ilen = 0; ++ ++ aligned_dlen1 = ALIGN(dlen1, 8); ++ aligned_dlen2 = ALIGN(dlen2, 8); ++ len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); ++ if (old_dir != new_dir) ++ len += plen; ++ dent = kmalloc(len, GFP_NOFS); ++ if (!dent) ++ return -ENOMEM; ++ ++ /* Make reservation before allocating sequence numbers */ ++ err = make_reservation(c, BASEHD, len); ++ if (err) ++ goto out_free; ++ ++ /* Make new dent */ ++ dent->ch.node_type = UBIFS_DENT_NODE; ++ dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name); ++ dent->inum = cpu_to_le64(old_inode->i_ino); ++ dent->type = get_dent_type(old_inode->i_mode); ++ dent->nlen = cpu_to_le16(new_dentry->d_name.len); ++ memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len); ++ dent->name[new_dentry->d_name.len] = '\0'; ++ zero_dent_node_unused(dent); ++ ubifs_prep_grp_node(c, dent, dlen1, 0); ++ ++ /* Make deletion dent */ ++ dent2 = (void *)dent + aligned_dlen1; ++ dent2->ch.node_type = UBIFS_DENT_NODE; ++ dent_key_init_flash(c, &dent2->key, old_dir->i_ino, ++ &old_dentry->d_name); ++ dent2->inum = 0; ++ dent2->type = DT_UNKNOWN; ++ dent2->nlen = cpu_to_le16(old_dentry->d_name.len); ++ memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len); ++ dent2->name[old_dentry->d_name.len] = '\0'; ++ zero_dent_node_unused(dent2); ++ ubifs_prep_grp_node(c, dent2, dlen2, 0); ++ ++ p = (void *)dent2 + aligned_dlen2; ++ if (new_inode) { ++ pack_inode(c, p, new_inode, 0); ++ p += ALIGN(ilen, 8); ++ } ++ ++ if (!move) ++ pack_inode(c, p, old_dir, 1); ++ else { ++ pack_inode(c, p, old_dir, 0); ++ p += ALIGN(plen, 8); ++ pack_inode(c, p, new_dir, 1); ++ } ++ ++ if (last_reference) { ++ err = ubifs_add_orphan(c, new_inode->i_ino); ++ if (err) { ++ release_head(c, BASEHD); ++ goto out_finish; ++ } ++ new_ui->del_cmtno = c->cmt_no; ++ } ++ ++ err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); ++ if (err) ++ goto out_release; ++ if (!sync) { ++ struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; ++ ++ ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino); ++ ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino); ++ if (new_inode) ++ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, ++ new_inode->i_ino); ++ } ++ release_head(c, BASEHD); ++ ++ dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name); ++ err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name); ++ if (err) ++ goto out_ro; ++ ++ err = ubifs_add_dirt(c, lnum, dlen2); ++ if (err) ++ goto out_ro; ++ ++ dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name); ++ err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name); ++ if (err) ++ goto out_ro; ++ ++ offs += aligned_dlen1 + aligned_dlen2; ++ if (new_inode) { ++ ino_key_init(c, &key, new_inode->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs, ilen); ++ if (err) ++ goto out_ro; ++ offs += ALIGN(ilen, 8); ++ } ++ ++ ino_key_init(c, &key, old_dir->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs, plen); ++ if (err) ++ goto out_ro; ++ ++ if (old_dir != new_dir) { ++ offs += ALIGN(plen, 8); ++ ino_key_init(c, &key, new_dir->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs, plen); ++ if (err) ++ goto out_ro; ++ } ++ ++ finish_reservation(c); ++ if (new_inode) { ++ mark_inode_clean(c, new_ui); ++ spin_lock(&new_ui->ui_lock); ++ new_ui->synced_i_size = new_ui->ui_size; ++ spin_unlock(&new_ui->ui_lock); ++ } ++ mark_inode_clean(c, ubifs_inode(old_dir)); ++ if (move) ++ mark_inode_clean(c, ubifs_inode(new_dir)); ++ kfree(dent); ++ return 0; ++ ++out_release: ++ release_head(c, BASEHD); ++out_ro: ++ ubifs_ro_mode(c, err); ++ if (last_reference) ++ ubifs_delete_orphan(c, new_inode->i_ino); ++out_finish: ++ finish_reservation(c); ++out_free: ++ kfree(dent); ++ return err; ++} ++ ++/** ++ * recomp_data_node - re-compress a truncated data node. ++ * @dn: data node to re-compress ++ * @new_len: new length ++ * ++ * This function is used when an inode is truncated and the last data node of ++ * the inode has to be re-compressed and re-written. ++ */ ++static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) ++{ ++ void *buf; ++ int err, len, compr_type, out_len; ++ ++ out_len = le32_to_cpu(dn->size); ++ buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS); ++ if (!buf) ++ return -ENOMEM; ++ ++ len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; ++ compr_type = le16_to_cpu(dn->compr_type); ++ err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); ++ if (err) ++ goto out; ++ ++ ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); ++ ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); ++ dn->compr_type = cpu_to_le16(compr_type); ++ dn->size = cpu_to_le32(*new_len); ++ *new_len = UBIFS_DATA_NODE_SZ + out_len; ++out: ++ kfree(buf); ++ return err; ++} ++ ++/** ++ * ubifs_jnl_truncate - update the journal for a truncation. ++ * @c: UBIFS file-system description object ++ * @inode: inode to truncate ++ * @old_size: old size ++ * @new_size: new size ++ * ++ * When the size of a file decreases due to truncation, a truncation node is ++ * written, the journal tree is updated, and the last data block is re-written ++ * if it has been affected. The inode is also updated in order to synchronize ++ * the new inode size. ++ * ++ * This function marks the inode as clean and returns zero on success. In case ++ * of failure, a negative error code is returned. ++ */ ++int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, ++ loff_t old_size, loff_t new_size) ++{ ++ union ubifs_key key, to_key; ++ struct ubifs_ino_node *ino; ++ struct ubifs_trun_node *trun; ++ struct ubifs_data_node *uninitialized_var(dn); ++ int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode); ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ino_t inum = inode->i_ino; ++ unsigned int blk; ++ ++ dbg_jnl("ino %lu, size %lld -> %lld", ++ (unsigned long)inum, old_size, new_size); ++ ubifs_assert(!ui->data_len); ++ ubifs_assert(S_ISREG(inode->i_mode)); ++ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); ++ ++ sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ + ++ UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR; ++ ino = kmalloc(sz, GFP_NOFS); ++ if (!ino) ++ return -ENOMEM; ++ ++ trun = (void *)ino + UBIFS_INO_NODE_SZ; ++ trun->ch.node_type = UBIFS_TRUN_NODE; ++ trun->inum = cpu_to_le32(inum); ++ trun->old_size = cpu_to_le64(old_size); ++ trun->new_size = cpu_to_le64(new_size); ++ zero_trun_node_unused(trun); ++ ++ dlen = new_size & (UBIFS_BLOCK_SIZE - 1); ++ if (dlen) { ++ /* Get last data block so it can be truncated */ ++ dn = (void *)trun + UBIFS_TRUN_NODE_SZ; ++ blk = new_size >> UBIFS_BLOCK_SHIFT; ++ data_key_init(c, &key, inum, blk); ++ dbg_jnl("last block key %s", DBGKEY(&key)); ++ err = ubifs_tnc_lookup(c, &key, dn); ++ if (err == -ENOENT) ++ dlen = 0; /* Not found (so it is a hole) */ ++ else if (err) ++ goto out_free; ++ else { ++ if (le32_to_cpu(dn->size) <= dlen) ++ dlen = 0; /* Nothing to do */ ++ else { ++ int compr_type = le16_to_cpu(dn->compr_type); ++ ++ if (compr_type != UBIFS_COMPR_NONE) { ++ err = recomp_data_node(dn, &dlen); ++ if (err) ++ goto out_free; ++ } else { ++ dn->size = cpu_to_le32(dlen); ++ dlen += UBIFS_DATA_NODE_SZ; ++ } ++ zero_data_node_unused(dn); ++ } ++ } ++ } ++ ++ /* Must make reservation before allocating sequence numbers */ ++ len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ; ++ if (dlen) ++ len += dlen; ++ err = make_reservation(c, BASEHD, len); ++ if (err) ++ goto out_free; ++ ++ pack_inode(c, ino, inode, 0); ++ ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); ++ if (dlen) ++ ubifs_prep_grp_node(c, dn, dlen, 1); ++ ++ err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); ++ if (err) ++ goto out_release; ++ if (!sync) ++ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum); ++ release_head(c, BASEHD); ++ ++ if (dlen) { ++ sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ; ++ err = ubifs_tnc_add(c, &key, lnum, sz, dlen); ++ if (err) ++ goto out_ro; ++ } ++ ++ ino_key_init(c, &key, inum); ++ err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ); ++ if (err) ++ goto out_ro; ++ ++ err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ); ++ if (err) ++ goto out_ro; ++ ++ bit = new_size & (UBIFS_BLOCK_SIZE - 1); ++ blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0); ++ data_key_init(c, &key, inum, blk); ++ ++ bit = old_size & (UBIFS_BLOCK_SIZE - 1); ++ blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1); ++ data_key_init(c, &to_key, inum, blk); ++ ++ err = ubifs_tnc_remove_range(c, &key, &to_key); ++ if (err) ++ goto out_ro; ++ ++ finish_reservation(c); ++ spin_lock(&ui->ui_lock); ++ ui->synced_i_size = ui->ui_size; ++ spin_unlock(&ui->ui_lock); ++ mark_inode_clean(c, ui); ++ kfree(ino); ++ return 0; ++ ++out_release: ++ release_head(c, BASEHD); ++out_ro: ++ ubifs_ro_mode(c, err); ++ finish_reservation(c); ++out_free: ++ kfree(ino); ++ return err; ++} ++ ++#ifdef CONFIG_UBIFS_FS_XATTR ++ ++/** ++ * ubifs_jnl_delete_xattr - delete an extended attribute. ++ * @c: UBIFS file-system description object ++ * @host: host inode ++ * @inode: extended attribute inode ++ * @nm: extended attribute entry name ++ * ++ * This function delete an extended attribute which is very similar to ++ * un-linking regular files - it writes a deletion xentry, a deletion inode and ++ * updates the target inode. Returns zero in case of success and a negative ++ * error code in case of failure. ++ */ ++int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, ++ const struct inode *inode, const struct qstr *nm) ++{ ++ int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen; ++ struct ubifs_dent_node *xent; ++ struct ubifs_ino_node *ino; ++ union ubifs_key xent_key, key1, key2; ++ int sync = IS_DIRSYNC(host); ++ struct ubifs_inode *host_ui = ubifs_inode(host); ++ ++ dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d", ++ host->i_ino, inode->i_ino, nm->name, ++ ubifs_inode(inode)->data_len); ++ ubifs_assert(inode->i_nlink == 0); ++ ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); ++ ++ /* ++ * Since we are deleting the inode, we do not bother to attach any data ++ * to it and assume its length is %UBIFS_INO_NODE_SZ. ++ */ ++ xlen = UBIFS_DENT_NODE_SZ + nm->len + 1; ++ aligned_xlen = ALIGN(xlen, 8); ++ hlen = host_ui->data_len + UBIFS_INO_NODE_SZ; ++ len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8); ++ ++ xent = kmalloc(len, GFP_NOFS); ++ if (!xent) ++ return -ENOMEM; ++ ++ /* Make reservation before allocating sequence numbers */ ++ err = make_reservation(c, BASEHD, len); ++ if (err) { ++ kfree(xent); ++ return err; ++ } ++ ++ xent->ch.node_type = UBIFS_XENT_NODE; ++ xent_key_init(c, &xent_key, host->i_ino, nm); ++ key_write(c, &xent_key, xent->key); ++ xent->inum = 0; ++ xent->type = get_dent_type(inode->i_mode); ++ xent->nlen = cpu_to_le16(nm->len); ++ memcpy(xent->name, nm->name, nm->len); ++ xent->name[nm->len] = '\0'; ++ zero_dent_node_unused(xent); ++ ubifs_prep_grp_node(c, xent, xlen, 0); ++ ++ ino = (void *)xent + aligned_xlen; ++ pack_inode(c, ino, inode, 0); ++ ino = (void *)ino + UBIFS_INO_NODE_SZ; ++ pack_inode(c, ino, host, 1); ++ ++ err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); ++ if (!sync && !err) ++ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino); ++ release_head(c, BASEHD); ++ kfree(xent); ++ if (err) ++ goto out_ro; ++ ++ /* Remove the extended attribute entry from TNC */ ++ err = ubifs_tnc_remove_nm(c, &xent_key, nm); ++ if (err) ++ goto out_ro; ++ err = ubifs_add_dirt(c, lnum, xlen); ++ if (err) ++ goto out_ro; ++ ++ /* ++ * Remove all nodes belonging to the extended attribute inode from TNC. ++ * Well, there actually must be only one node - the inode itself. ++ */ ++ lowest_ino_key(c, &key1, inode->i_ino); ++ highest_ino_key(c, &key2, inode->i_ino); ++ err = ubifs_tnc_remove_range(c, &key1, &key2); ++ if (err) ++ goto out_ro; ++ err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ); ++ if (err) ++ goto out_ro; ++ ++ /* And update TNC with the new host inode position */ ++ ino_key_init(c, &key1, host->i_ino); ++ err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen); ++ if (err) ++ goto out_ro; ++ ++ finish_reservation(c); ++ spin_lock(&host_ui->ui_lock); ++ host_ui->synced_i_size = host_ui->ui_size; ++ spin_unlock(&host_ui->ui_lock); ++ mark_inode_clean(c, host_ui); ++ return 0; ++ ++out_ro: ++ ubifs_ro_mode(c, err); ++ finish_reservation(c); ++ return err; ++} ++ ++/** ++ * ubifs_jnl_change_xattr - change an extended attribute. ++ * @c: UBIFS file-system description object ++ * @inode: extended attribute inode ++ * @host: host inode ++ * ++ * This function writes the updated version of an extended attribute inode and ++ * the host inode to the journal (to the base head). The host inode is written ++ * after the extended attribute inode in order to guarantee that the extended ++ * attribute will be flushed when the inode is synchronized by 'fsync()' and ++ * consequently, the write-buffer is synchronized. This function returns zero ++ * in case of success and a negative error code in case of failure. ++ */ ++int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, ++ const struct inode *host) ++{ ++ int err, len1, len2, aligned_len, aligned_len1, lnum, offs; ++ struct ubifs_inode *host_ui = ubifs_inode(host); ++ struct ubifs_ino_node *ino; ++ union ubifs_key key; ++ int sync = IS_DIRSYNC(host); ++ ++ dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino); ++ ubifs_assert(host->i_nlink > 0); ++ ubifs_assert(inode->i_nlink > 0); ++ ubifs_assert(mutex_is_locked(&host_ui->ui_mutex)); ++ ++ len1 = UBIFS_INO_NODE_SZ + host_ui->data_len; ++ len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len; ++ aligned_len1 = ALIGN(len1, 8); ++ aligned_len = aligned_len1 + ALIGN(len2, 8); ++ ++ ino = kmalloc(aligned_len, GFP_NOFS); ++ if (!ino) ++ return -ENOMEM; ++ ++ /* Make reservation before allocating sequence numbers */ ++ err = make_reservation(c, BASEHD, aligned_len); ++ if (err) ++ goto out_free; ++ ++ pack_inode(c, ino, host, 0); ++ pack_inode(c, (void *)ino + aligned_len1, inode, 1); ++ ++ err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); ++ if (!sync && !err) { ++ struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf; ++ ++ ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino); ++ ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino); ++ } ++ release_head(c, BASEHD); ++ if (err) ++ goto out_ro; ++ ++ ino_key_init(c, &key, host->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs, len1); ++ if (err) ++ goto out_ro; ++ ++ ino_key_init(c, &key, inode->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2); ++ if (err) ++ goto out_ro; ++ ++ finish_reservation(c); ++ spin_lock(&host_ui->ui_lock); ++ host_ui->synced_i_size = host_ui->ui_size; ++ spin_unlock(&host_ui->ui_lock); ++ mark_inode_clean(c, host_ui); ++ kfree(ino); ++ return 0; ++ ++out_ro: ++ ubifs_ro_mode(c, err); ++ finish_reservation(c); ++out_free: ++ kfree(ino); ++ return err; ++} ++ ++#endif /* CONFIG_UBIFS_FS_XATTR */ +diff -Nurd linux-2.6.24/fs/ubifs/key.h ubifs-v2.6.24/fs/ubifs/key.h +--- linux-2.6.24/fs/ubifs/key.h 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/key.h 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,557 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This header contains various key-related definitions and helper function. ++ * UBIFS allows several key schemes, so we access key fields only via these ++ * helpers. At the moment only one key scheme is supported. ++ * ++ * Simple key scheme ++ * ~~~~~~~~~~~~~~~~~ ++ * ++ * Keys are 64-bits long. First 32-bits are inode number (parent inode number ++ * in case of direntry key). Next 3 bits are node type. The last 29 bits are ++ * 4KiB offset in case of inode node, and direntry hash in case of a direntry ++ * node. We use "r5" hash borrowed from reiserfs. ++ */ ++ ++#ifndef __UBIFS_KEY_H__ ++#define __UBIFS_KEY_H__ ++ ++/** ++ * key_mask_hash - mask a valid hash value. ++ * @val: value to be masked ++ * ++ * We use hash values as offset in directories, so values %0 and %1 are ++ * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This ++ * function makes sure the reserved values are not used. ++ */ ++static inline uint32_t key_mask_hash(uint32_t hash) ++{ ++ hash &= UBIFS_S_KEY_HASH_MASK; ++ if (unlikely(hash <= 2)) ++ hash += 3; ++ return hash; ++} ++ ++/** ++ * key_r5_hash - R5 hash function (borrowed from reiserfs). ++ * @s: direntry name ++ * @len: name length ++ */ ++static inline uint32_t key_r5_hash(const char *s, int len) ++{ ++ uint32_t a = 0; ++ const signed char *str = (const signed char *)s; ++ ++ while (*str) { ++ a += *str << 4; ++ a += *str >> 4; ++ a *= 11; ++ str++; ++ } ++ ++ return key_mask_hash(a); ++} ++ ++/** ++ * key_test_hash - testing hash function. ++ * @str: direntry name ++ * @len: name length ++ */ ++static inline uint32_t key_test_hash(const char *str, int len) ++{ ++ uint32_t a = 0; ++ ++ len = min_t(uint32_t, len, 4); ++ memcpy(&a, str, len); ++ return key_mask_hash(a); ++} ++ ++/** ++ * ino_key_init - initialize inode key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: inode number ++ */ ++static inline void ino_key_init(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum) ++{ ++ key->u32[0] = inum; ++ key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS; ++} ++ ++/** ++ * ino_key_init_flash - initialize on-flash inode key. ++ * @c: UBIFS file-system description object ++ * @k: key to initialize ++ * @inum: inode number ++ */ ++static inline void ino_key_init_flash(const struct ubifs_info *c, void *k, ++ ino_t inum) ++{ ++ union ubifs_key *key = k; ++ ++ key->j32[0] = cpu_to_le32(inum); ++ key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS); ++ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); ++} ++ ++/** ++ * lowest_ino_key - get the lowest possible inode key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: inode number ++ */ ++static inline void lowest_ino_key(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum) ++{ ++ key->u32[0] = inum; ++ key->u32[1] = 0; ++} ++ ++/** ++ * highest_ino_key - get the highest possible inode key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: inode number ++ */ ++static inline void highest_ino_key(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum) ++{ ++ key->u32[0] = inum; ++ key->u32[1] = 0xffffffff; ++} ++ ++/** ++ * dent_key_init - initialize directory entry key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: parent inode number ++ * @nm: direntry name and length ++ */ ++static inline void dent_key_init(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum, ++ const struct qstr *nm) ++{ ++ uint32_t hash = c->key_hash(nm->name, nm->len); ++ ++ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); ++ key->u32[0] = inum; ++ key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); ++} ++ ++/** ++ * dent_key_init_hash - initialize directory entry key without re-calculating ++ * hash function. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: parent inode number ++ * @hash: direntry name hash ++ */ ++static inline void dent_key_init_hash(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum, ++ uint32_t hash) ++{ ++ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); ++ key->u32[0] = inum; ++ key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); ++} ++ ++/** ++ * dent_key_init_flash - initialize on-flash directory entry key. ++ * @c: UBIFS file-system description object ++ * @k: key to initialize ++ * @inum: parent inode number ++ * @nm: direntry name and length ++ */ ++static inline void dent_key_init_flash(const struct ubifs_info *c, void *k, ++ ino_t inum, const struct qstr *nm) ++{ ++ union ubifs_key *key = k; ++ uint32_t hash = c->key_hash(nm->name, nm->len); ++ ++ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); ++ key->j32[0] = cpu_to_le32(inum); ++ key->j32[1] = cpu_to_le32(hash | ++ (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS)); ++ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); ++} ++ ++/** ++ * lowest_dent_key - get the lowest possible directory entry key. ++ * @c: UBIFS file-system description object ++ * @key: where to store the lowest key ++ * @inum: parent inode number ++ */ ++static inline void lowest_dent_key(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum) ++{ ++ key->u32[0] = inum; ++ key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS; ++} ++ ++/** ++ * xent_key_init - initialize extended attribute entry key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: host inode number ++ * @nm: extended attribute entry name and length ++ */ ++static inline void xent_key_init(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum, ++ const struct qstr *nm) ++{ ++ uint32_t hash = c->key_hash(nm->name, nm->len); ++ ++ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); ++ key->u32[0] = inum; ++ key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); ++} ++ ++/** ++ * xent_key_init_hash - initialize extended attribute entry key without ++ * re-calculating hash function. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: host inode number ++ * @hash: extended attribute entry name hash ++ */ ++static inline void xent_key_init_hash(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum, ++ uint32_t hash) ++{ ++ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); ++ key->u32[0] = inum; ++ key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); ++} ++ ++/** ++ * xent_key_init_flash - initialize on-flash extended attribute entry key. ++ * @c: UBIFS file-system description object ++ * @k: key to initialize ++ * @inum: host inode number ++ * @nm: extended attribute entry name and length ++ */ ++static inline void xent_key_init_flash(const struct ubifs_info *c, void *k, ++ ino_t inum, const struct qstr *nm) ++{ ++ union ubifs_key *key = k; ++ uint32_t hash = c->key_hash(nm->name, nm->len); ++ ++ ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); ++ key->j32[0] = cpu_to_le32(inum); ++ key->j32[1] = cpu_to_le32(hash | ++ (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS)); ++ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); ++} ++ ++/** ++ * lowest_xent_key - get the lowest possible extended attribute entry key. ++ * @c: UBIFS file-system description object ++ * @key: where to store the lowest key ++ * @inum: host inode number ++ */ ++static inline void lowest_xent_key(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum) ++{ ++ key->u32[0] = inum; ++ key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS; ++} ++ ++/** ++ * data_key_init - initialize data key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: inode number ++ * @block: block number ++ */ ++static inline void data_key_init(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum, ++ unsigned int block) ++{ ++ ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); ++ key->u32[0] = inum; ++ key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS); ++} ++ ++/** ++ * data_key_init_flash - initialize on-flash data key. ++ * @c: UBIFS file-system description object ++ * @k: key to initialize ++ * @inum: inode number ++ * @block: block number ++ */ ++static inline void data_key_init_flash(const struct ubifs_info *c, void *k, ++ ino_t inum, unsigned int block) ++{ ++ union ubifs_key *key = k; ++ ++ ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); ++ key->j32[0] = cpu_to_le32(inum); ++ key->j32[1] = cpu_to_le32(block | ++ (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); ++ memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); ++} ++ ++/** ++ * trun_key_init - initialize truncation node key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * @inum: inode number ++ * ++ * Note, UBIFS does not have truncation keys on the media and this function is ++ * only used for purposes of replay. ++ */ ++static inline void trun_key_init(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum) ++{ ++ key->u32[0] = inum; ++ key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS; ++} ++ ++/** ++ * key_type - get key type. ++ * @c: UBIFS file-system description object ++ * @key: key to get type of ++ */ ++static inline int key_type(const struct ubifs_info *c, ++ const union ubifs_key *key) ++{ ++ return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS; ++} ++ ++/** ++ * key_type_flash - get type of a on-flash formatted key. ++ * @c: UBIFS file-system description object ++ * @k: key to get type of ++ */ ++static inline int key_type_flash(const struct ubifs_info *c, const void *k) ++{ ++ const union ubifs_key *key = k; ++ ++ return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS; ++} ++ ++/** ++ * key_inum - fetch inode number from key. ++ * @c: UBIFS file-system description object ++ * @k: key to fetch inode number from ++ */ ++static inline ino_t key_inum(const struct ubifs_info *c, const void *k) ++{ ++ const union ubifs_key *key = k; ++ ++ return key->u32[0]; ++} ++ ++/** ++ * key_inum_flash - fetch inode number from an on-flash formatted key. ++ * @c: UBIFS file-system description object ++ * @k: key to fetch inode number from ++ */ ++static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) ++{ ++ const union ubifs_key *key = k; ++ ++ return le32_to_cpu(key->j32[0]); ++} ++ ++/** ++ * key_hash - get directory entry hash. ++ * @c: UBIFS file-system description object ++ * @key: the key to get hash from ++ */ ++static inline uint32_t key_hash(const struct ubifs_info *c, ++ const union ubifs_key *key) ++{ ++ return key->u32[1] & UBIFS_S_KEY_HASH_MASK; ++} ++ ++/** ++ * key_hash_flash - get directory entry hash from an on-flash formatted key. ++ * @c: UBIFS file-system description object ++ * @k: the key to get hash from ++ */ ++static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) ++{ ++ const union ubifs_key *key = k; ++ ++ return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK; ++} ++ ++/** ++ * key_block - get data block number. ++ * @c: UBIFS file-system description object ++ * @key: the key to get the block number from ++ */ ++static inline unsigned int key_block(const struct ubifs_info *c, ++ const union ubifs_key *key) ++{ ++ return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK; ++} ++ ++/** ++ * key_block_flash - get data block number from an on-flash formatted key. ++ * @c: UBIFS file-system description object ++ * @k: the key to get the block number from ++ */ ++static inline unsigned int key_block_flash(const struct ubifs_info *c, ++ const void *k) ++{ ++ const union ubifs_key *key = k; ++ ++ return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK; ++} ++ ++/** ++ * key_read - transform a key to in-memory format. ++ * @c: UBIFS file-system description object ++ * @from: the key to transform ++ * @to: the key to store the result ++ */ ++static inline void key_read(const struct ubifs_info *c, const void *from, ++ union ubifs_key *to) ++{ ++ const union ubifs_key *f = from; ++ ++ to->u32[0] = le32_to_cpu(f->j32[0]); ++ to->u32[1] = le32_to_cpu(f->j32[1]); ++} ++ ++/** ++ * key_write - transform a key from in-memory format. ++ * @c: UBIFS file-system description object ++ * @from: the key to transform ++ * @to: the key to store the result ++ */ ++static inline void key_write(const struct ubifs_info *c, ++ const union ubifs_key *from, void *to) ++{ ++ union ubifs_key *t = to; ++ ++ t->j32[0] = cpu_to_le32(from->u32[0]); ++ t->j32[1] = cpu_to_le32(from->u32[1]); ++ memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8); ++} ++ ++/** ++ * key_write_idx - transform a key from in-memory format for the index. ++ * @c: UBIFS file-system description object ++ * @from: the key to transform ++ * @to: the key to store the result ++ */ ++static inline void key_write_idx(const struct ubifs_info *c, ++ const union ubifs_key *from, void *to) ++{ ++ union ubifs_key *t = to; ++ ++ t->j32[0] = cpu_to_le32(from->u32[0]); ++ t->j32[1] = cpu_to_le32(from->u32[1]); ++} ++ ++/** ++ * key_copy - copy a key. ++ * @c: UBIFS file-system description object ++ * @from: the key to copy from ++ * @to: the key to copy to ++ */ ++static inline void key_copy(const struct ubifs_info *c, ++ const union ubifs_key *from, union ubifs_key *to) ++{ ++ to->u64[0] = from->u64[0]; ++} ++ ++/** ++ * keys_cmp - compare keys. ++ * @c: UBIFS file-system description object ++ * @key1: the first key to compare ++ * @key2: the second key to compare ++ * ++ * This function compares 2 keys and returns %-1 if @key1 is less than ++ * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2. ++ */ ++static inline int keys_cmp(const struct ubifs_info *c, ++ const union ubifs_key *key1, ++ const union ubifs_key *key2) ++{ ++ if (key1->u32[0] < key2->u32[0]) ++ return -1; ++ if (key1->u32[0] > key2->u32[0]) ++ return 1; ++ if (key1->u32[1] < key2->u32[1]) ++ return -1; ++ if (key1->u32[1] > key2->u32[1]) ++ return 1; ++ ++ return 0; ++} ++ ++/** ++ * keys_eq - determine if keys are equivalent. ++ * @c: UBIFS file-system description object ++ * @key1: the first key to compare ++ * @key2: the second key to compare ++ * ++ * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and ++ * %0 if not. ++ */ ++static inline int keys_eq(const struct ubifs_info *c, ++ const union ubifs_key *key1, ++ const union ubifs_key *key2) ++{ ++ if (key1->u32[0] != key2->u32[0]) ++ return 0; ++ if (key1->u32[1] != key2->u32[1]) ++ return 0; ++ return 1; ++} ++ ++/** ++ * is_hash_key - is a key vulnerable to hash collisions. ++ * @c: UBIFS file-system description object ++ * @key: key ++ * ++ * This function returns %1 if @key is a hashed key or %0 otherwise. ++ */ ++static inline int is_hash_key(const struct ubifs_info *c, ++ const union ubifs_key *key) ++{ ++ int type = key_type(c, key); ++ ++ return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY; ++} ++ ++/** ++ * key_max_inode_size - get maximum file size allowed by current key format. ++ * @c: UBIFS file-system description object ++ */ ++static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) ++{ ++ switch (c->key_fmt) { ++ case UBIFS_SIMPLE_KEY_FMT: ++ return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE; ++ default: ++ return 0; ++ } ++} ++#endif /* !__UBIFS_KEY_H__ */ +diff -Nurd linux-2.6.24/fs/ubifs/log.c ubifs-v2.6.24/fs/ubifs/log.c +--- linux-2.6.24/fs/ubifs/log.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/log.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,806 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file is a part of UBIFS journal implementation and contains various ++ * functions which manipulate the log. The log is a fixed area on the flash ++ * which does not contain any data but refers to buds. The log is a part of the ++ * journal. ++ */ ++ ++#include "ubifs.h" ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++static int dbg_check_bud_bytes(struct ubifs_info *c); ++#else ++#define dbg_check_bud_bytes(c) 0 ++#endif ++ ++/** ++ * ubifs_search_bud - search bud LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: logical eraseblock number to search ++ * ++ * This function searches bud LEB @lnum. Returns bud description object in case ++ * of success and %NULL if there is no bud with this LEB number. ++ */ ++struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) ++{ ++ struct rb_node *p; ++ struct ubifs_bud *bud; ++ ++ spin_lock(&c->buds_lock); ++ p = c->buds.rb_node; ++ while (p) { ++ bud = rb_entry(p, struct ubifs_bud, rb); ++ if (lnum < bud->lnum) ++ p = p->rb_left; ++ else if (lnum > bud->lnum) ++ p = p->rb_right; ++ else { ++ spin_unlock(&c->buds_lock); ++ return bud; ++ } ++ } ++ spin_unlock(&c->buds_lock); ++ return NULL; ++} ++ ++/** ++ * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one. ++ * @c: UBIFS file-system description object ++ * @lnum: logical eraseblock number to search ++ * ++ * This functions returns the wbuf for @lnum or %NULL if there is not one. ++ */ ++struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) ++{ ++ struct rb_node *p; ++ struct ubifs_bud *bud; ++ int jhead; ++ ++ if (!c->jheads) ++ return NULL; ++ ++ spin_lock(&c->buds_lock); ++ p = c->buds.rb_node; ++ while (p) { ++ bud = rb_entry(p, struct ubifs_bud, rb); ++ if (lnum < bud->lnum) ++ p = p->rb_left; ++ else if (lnum > bud->lnum) ++ p = p->rb_right; ++ else { ++ jhead = bud->jhead; ++ spin_unlock(&c->buds_lock); ++ return &c->jheads[jhead].wbuf; ++ } ++ } ++ spin_unlock(&c->buds_lock); ++ return NULL; ++} ++ ++/** ++ * next_log_lnum - switch to the next log LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: current log LEB ++ */ ++static inline int next_log_lnum(const struct ubifs_info *c, int lnum) ++{ ++ lnum += 1; ++ if (lnum > c->log_last) ++ lnum = UBIFS_LOG_LNUM; ++ ++ return lnum; ++} ++ ++/** ++ * empty_log_bytes - calculate amount of empty space in the log. ++ * @c: UBIFS file-system description object ++ */ ++static inline long long empty_log_bytes(const struct ubifs_info *c) ++{ ++ long long h, t; ++ ++ h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; ++ t = (long long)c->ltail_lnum * c->leb_size; ++ ++ if (h >= t) ++ return c->log_bytes - h + t; ++ else ++ return t - h; ++} ++ ++/** ++ * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. ++ * @c: UBIFS file-system description object ++ * @bud: the bud to add ++ */ ++void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) ++{ ++ struct rb_node **p, *parent = NULL; ++ struct ubifs_bud *b; ++ struct ubifs_jhead *jhead; ++ ++ spin_lock(&c->buds_lock); ++ p = &c->buds.rb_node; ++ while (*p) { ++ parent = *p; ++ b = rb_entry(parent, struct ubifs_bud, rb); ++ ubifs_assert(bud->lnum != b->lnum); ++ if (bud->lnum < b->lnum) ++ p = &(*p)->rb_left; ++ else ++ p = &(*p)->rb_right; ++ } ++ ++ rb_link_node(&bud->rb, parent, p); ++ rb_insert_color(&bud->rb, &c->buds); ++ if (c->jheads) { ++ jhead = &c->jheads[bud->jhead]; ++ list_add_tail(&bud->list, &jhead->buds_list); ++ } else ++ ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); ++ ++ /* ++ * Note, although this is a new bud, we anyway account this space now, ++ * before any data has been written to it, because this is about to ++ * guarantee fixed mount time, and this bud will anyway be read and ++ * scanned. ++ */ ++ c->bud_bytes += c->leb_size - bud->start; ++ ++ dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, ++ bud->start, bud->jhead, c->bud_bytes); ++ spin_unlock(&c->buds_lock); ++} ++ ++/** ++ * ubifs_create_buds_lists - create journal head buds lists for remount rw. ++ * @c: UBIFS file-system description object ++ */ ++void ubifs_create_buds_lists(struct ubifs_info *c) ++{ ++ struct rb_node *p; ++ ++ spin_lock(&c->buds_lock); ++ p = rb_first(&c->buds); ++ while (p) { ++ struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); ++ struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; ++ ++ list_add_tail(&bud->list, &jhead->buds_list); ++ p = rb_next(p); ++ } ++ spin_unlock(&c->buds_lock); ++} ++ ++/** ++ * ubifs_add_bud_to_log - add a new bud to the log. ++ * @c: UBIFS file-system description object ++ * @jhead: journal head the bud belongs to ++ * @lnum: LEB number of the bud ++ * @offs: starting offset of the bud ++ * ++ * This function writes reference node for the new bud LEB @lnum it to the log, ++ * and adds it to the buds tress. It also makes sure that log size does not ++ * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success, ++ * %-EAGAIN if commit is required, and a negative error codes in case of ++ * failure. ++ */ ++int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) ++{ ++ int err; ++ struct ubifs_bud *bud; ++ struct ubifs_ref_node *ref; ++ ++ bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS); ++ if (!bud) ++ return -ENOMEM; ++ ref = kzalloc(c->ref_node_alsz, GFP_NOFS); ++ if (!ref) { ++ kfree(bud); ++ return -ENOMEM; ++ } ++ ++ mutex_lock(&c->log_mutex); ++ ++ if (c->ro_media) { ++ err = -EROFS; ++ goto out_unlock; ++ } ++ ++ /* Make sure we have enough space in the log */ ++ if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) { ++ dbg_log("not enough log space - %lld, required %d", ++ empty_log_bytes(c), c->min_log_bytes); ++ ubifs_commit_required(c); ++ err = -EAGAIN; ++ goto out_unlock; ++ } ++ ++ /* ++ * Make sure the amount of space in buds will not exceed the ++ * 'c->max_bud_bytes' limit, because we want to guarantee mount time ++ * limits. ++ * ++ * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes ++ * because we are holding @c->log_mutex. All @c->bud_bytes take place ++ * when both @c->log_mutex and @c->bud_bytes are locked. ++ */ ++ if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) { ++ dbg_log("bud bytes %lld (%lld max), require commit", ++ c->bud_bytes, c->max_bud_bytes); ++ ubifs_commit_required(c); ++ err = -EAGAIN; ++ goto out_unlock; ++ } ++ ++ /* ++ * If the journal is full enough - start background commit. Note, it is ++ * OK to read 'c->cmt_state' without spinlock because integer reads ++ * are atomic in the kernel. ++ */ ++ if (c->bud_bytes >= c->bg_bud_bytes && ++ c->cmt_state == COMMIT_RESTING) { ++ dbg_log("bud bytes %lld (%lld max), initiate BG commit", ++ c->bud_bytes, c->max_bud_bytes); ++ ubifs_request_bg_commit(c); ++ } ++ ++ bud->lnum = lnum; ++ bud->start = offs; ++ bud->jhead = jhead; ++ ++ ref->ch.node_type = UBIFS_REF_NODE; ++ ref->lnum = cpu_to_le32(bud->lnum); ++ ref->offs = cpu_to_le32(bud->start); ++ ref->jhead = cpu_to_le32(jhead); ++ ++ if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { ++ c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_offs = 0; ++ } ++ ++ if (c->lhead_offs == 0) { ++ /* Must ensure next log LEB has been unmapped */ ++ err = ubifs_leb_unmap(c, c->lhead_lnum); ++ if (err) ++ goto out_unlock; ++ } ++ ++ if (bud->start == 0) { ++ /* ++ * Before writing the LEB reference which refers an empty LEB ++ * to the log, we have to make sure it is mapped, because ++ * otherwise we'd risk to refer an LEB with garbage in case of ++ * an unclean reboot, because the target LEB might have been ++ * unmapped, but not yet physically erased. ++ */ ++ err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); ++ if (err) ++ goto out_unlock; ++ } ++ ++ dbg_log("write ref LEB %d:%d", ++ c->lhead_lnum, c->lhead_offs); ++ err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum, ++ c->lhead_offs, UBI_SHORTTERM); ++ if (err) ++ goto out_unlock; ++ ++ c->lhead_offs += c->ref_node_alsz; ++ ++ ubifs_add_bud(c, bud); ++ ++ mutex_unlock(&c->log_mutex); ++ kfree(ref); ++ return 0; ++ ++out_unlock: ++ if (err != -EAGAIN) ++ ubifs_ro_mode(c, err); ++ mutex_unlock(&c->log_mutex); ++ kfree(ref); ++ kfree(bud); ++ return err; ++} ++ ++/** ++ * remove_buds - remove used buds. ++ * @c: UBIFS file-system description object ++ * ++ * This function removes use buds from the buds tree. It does not remove the ++ * buds which are pointed to by journal heads. ++ */ ++static void remove_buds(struct ubifs_info *c) ++{ ++ struct rb_node *p; ++ ++ ubifs_assert(list_empty(&c->old_buds)); ++ c->cmt_bud_bytes = 0; ++ spin_lock(&c->buds_lock); ++ p = rb_first(&c->buds); ++ while (p) { ++ struct rb_node *p1 = p; ++ struct ubifs_bud *bud; ++ struct ubifs_wbuf *wbuf; ++ ++ p = rb_next(p); ++ bud = rb_entry(p1, struct ubifs_bud, rb); ++ wbuf = &c->jheads[bud->jhead].wbuf; ++ ++ if (wbuf->lnum == bud->lnum) { ++ /* ++ * Do not remove buds which are pointed to by journal ++ * heads (non-closed buds). ++ */ ++ c->cmt_bud_bytes += wbuf->offs - bud->start; ++ dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " ++ "cmt_bud_bytes %lld", bud->lnum, bud->start, ++ bud->jhead, wbuf->offs - bud->start, ++ c->cmt_bud_bytes); ++ bud->start = wbuf->offs; ++ } else { ++ c->cmt_bud_bytes += c->leb_size - bud->start; ++ dbg_log("remove %d:%d, jhead %d, bud bytes %d, " ++ "cmt_bud_bytes %lld", bud->lnum, bud->start, ++ bud->jhead, c->leb_size - bud->start, ++ c->cmt_bud_bytes); ++ rb_erase(p1, &c->buds); ++ /* ++ * If the commit does not finish, the recovery will need ++ * to replay the journal, in which case the old buds ++ * must be unchanged. Do not release them until post ++ * commit i.e. do not allow them to be garbage ++ * collected. ++ */ ++ list_move(&bud->list, &c->old_buds); ++ } ++ } ++ spin_unlock(&c->buds_lock); ++} ++ ++/** ++ * ubifs_log_start_commit - start commit. ++ * @c: UBIFS file-system description object ++ * @ltail_lnum: return new log tail LEB number ++ * ++ * The commit operation starts with writing "commit start" node to the log and ++ * reference nodes for all journal heads which will define new journal after ++ * the commit has been finished. The commit start and reference nodes are ++ * written in one go to the nearest empty log LEB (hence, when commit is ++ * finished UBIFS may safely unmap all the previous log LEBs). This function ++ * returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) ++{ ++ void *buf; ++ struct ubifs_cs_node *cs; ++ struct ubifs_ref_node *ref; ++ int err, i, max_len, len; ++ ++ err = dbg_check_bud_bytes(c); ++ if (err) ++ return err; ++ ++ max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ; ++ max_len = ALIGN(max_len, c->min_io_size); ++ buf = cs = kmalloc(max_len, GFP_NOFS); ++ if (!buf) ++ return -ENOMEM; ++ ++ cs->ch.node_type = UBIFS_CS_NODE; ++ cs->cmt_no = cpu_to_le64(c->cmt_no); ++ ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); ++ ++ /* ++ * Note, we do not lock 'c->log_mutex' because this is the commit start ++ * phase and we are exclusively using the log. And we do not lock ++ * write-buffer because nobody can write to the file-system at this ++ * phase. ++ */ ++ ++ len = UBIFS_CS_NODE_SZ; ++ for (i = 0; i < c->jhead_cnt; i++) { ++ int lnum = c->jheads[i].wbuf.lnum; ++ int offs = c->jheads[i].wbuf.offs; ++ ++ if (lnum == -1 || offs == c->leb_size) ++ continue; ++ ++ dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); ++ ref = buf + len; ++ ref->ch.node_type = UBIFS_REF_NODE; ++ ref->lnum = cpu_to_le32(lnum); ++ ref->offs = cpu_to_le32(offs); ++ ref->jhead = cpu_to_le32(i); ++ ++ ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0); ++ len += UBIFS_REF_NODE_SZ; ++ } ++ ++ ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len); ++ ++ /* Switch to the next log LEB */ ++ if (c->lhead_offs) { ++ c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_offs = 0; ++ } ++ ++ if (c->lhead_offs == 0) { ++ /* Must ensure next LEB has been unmapped */ ++ err = ubifs_leb_unmap(c, c->lhead_lnum); ++ if (err) ++ goto out; ++ } ++ ++ len = ALIGN(len, c->min_io_size); ++ dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); ++ err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM); ++ if (err) ++ goto out; ++ ++ *ltail_lnum = c->lhead_lnum; ++ ++ c->lhead_offs += len; ++ if (c->lhead_offs == c->leb_size) { ++ c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_offs = 0; ++ } ++ ++ remove_buds(c); ++ ++ /* ++ * We have started the commit and now users may use the rest of the log ++ * for new writes. ++ */ ++ c->min_log_bytes = 0; ++ ++out: ++ kfree(buf); ++ return err; ++} ++ ++/** ++ * ubifs_log_end_commit - end commit. ++ * @c: UBIFS file-system description object ++ * @ltail_lnum: new log tail LEB number ++ * ++ * This function is called on when the commit operation was finished. It ++ * moves log tail to new position and unmaps LEBs which contain obsolete data. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) ++{ ++ int err; ++ ++ /* ++ * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS ++ * writes during commit. Its only short "commit" start phase when ++ * writers are blocked. ++ */ ++ mutex_lock(&c->log_mutex); ++ ++ dbg_log("old tail was LEB %d:0, new tail is LEB %d:0", ++ c->ltail_lnum, ltail_lnum); ++ ++ c->ltail_lnum = ltail_lnum; ++ /* ++ * The commit is finished and from now on it must be guaranteed that ++ * there is always enough space for the next commit. ++ */ ++ c->min_log_bytes = c->leb_size; ++ ++ spin_lock(&c->buds_lock); ++ c->bud_bytes -= c->cmt_bud_bytes; ++ spin_unlock(&c->buds_lock); ++ ++ err = dbg_check_bud_bytes(c); ++ ++ mutex_unlock(&c->log_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_log_post_commit - things to do after commit is completed. ++ * @c: UBIFS file-system description object ++ * @old_ltail_lnum: old log tail LEB number ++ * ++ * Release buds only after commit is completed, because they must be unchanged ++ * if recovery is needed. ++ * ++ * Unmap log LEBs only after commit is completed, because they may be needed for ++ * recovery. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) ++{ ++ int lnum, err = 0; ++ ++ while (!list_empty(&c->old_buds)) { ++ struct ubifs_bud *bud; ++ ++ bud = list_entry(c->old_buds.next, struct ubifs_bud, list); ++ err = ubifs_return_leb(c, bud->lnum); ++ if (err) ++ return err; ++ list_del(&bud->list); ++ kfree(bud); ++ } ++ mutex_lock(&c->log_mutex); ++ for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; ++ lnum = next_log_lnum(c, lnum)) { ++ dbg_log("unmap log LEB %d", lnum); ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ goto out; ++ } ++out: ++ mutex_unlock(&c->log_mutex); ++ return err; ++} ++ ++/** ++ * struct done_ref - references that have been done. ++ * @rb: rb-tree node ++ * @lnum: LEB number ++ */ ++struct done_ref { ++ struct rb_node rb; ++ int lnum; ++}; ++ ++/** ++ * done_already - determine if a reference has been done already. ++ * @done_tree: rb-tree to store references that have been done ++ * @lnum: LEB number of reference ++ * ++ * This function returns %1 if the reference has been done, %0 if not, otherwise ++ * a negative error code is returned. ++ */ ++static int done_already(struct rb_root *done_tree, int lnum) ++{ ++ struct rb_node **p = &done_tree->rb_node, *parent = NULL; ++ struct done_ref *dr; ++ ++ while (*p) { ++ parent = *p; ++ dr = rb_entry(parent, struct done_ref, rb); ++ if (lnum < dr->lnum) ++ p = &(*p)->rb_left; ++ else if (lnum > dr->lnum) ++ p = &(*p)->rb_right; ++ else ++ return 1; ++ } ++ ++ dr = kzalloc(sizeof(struct done_ref), GFP_NOFS); ++ if (!dr) ++ return -ENOMEM; ++ ++ dr->lnum = lnum; ++ ++ rb_link_node(&dr->rb, parent, p); ++ rb_insert_color(&dr->rb, done_tree); ++ ++ return 0; ++} ++ ++/** ++ * destroy_done_tree - destroy the done tree. ++ * @done_tree: done tree to destroy ++ */ ++static void destroy_done_tree(struct rb_root *done_tree) ++{ ++ struct rb_node *this = done_tree->rb_node; ++ struct done_ref *dr; ++ ++ while (this) { ++ if (this->rb_left) { ++ this = this->rb_left; ++ continue; ++ } else if (this->rb_right) { ++ this = this->rb_right; ++ continue; ++ } ++ dr = rb_entry(this, struct done_ref, rb); ++ this = rb_parent(this); ++ if (this) { ++ if (this->rb_left == &dr->rb) ++ this->rb_left = NULL; ++ else ++ this->rb_right = NULL; ++ } ++ kfree(dr); ++ } ++} ++ ++/** ++ * add_node - add a node to the consolidated log. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to which to add ++ * @lnum: LEB number to which to write is passed and returned here ++ * @offs: offset to where to write is passed and returned here ++ * @node: node to add ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, ++ void *node) ++{ ++ struct ubifs_ch *ch = node; ++ int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs; ++ ++ if (len > remains) { ++ int sz = ALIGN(*offs, c->min_io_size), err; ++ ++ ubifs_pad(c, buf + *offs, sz - *offs); ++ err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); ++ if (err) ++ return err; ++ *lnum = next_log_lnum(c, *lnum); ++ *offs = 0; ++ } ++ memcpy(buf + *offs, node, len); ++ *offs += ALIGN(len, 8); ++ return 0; ++} ++ ++/** ++ * ubifs_consolidate_log - consolidate the log. ++ * @c: UBIFS file-system description object ++ * ++ * Repeated failed commits could cause the log to be full, but at least 1 LEB is ++ * needed for commit. This function rewrites the reference nodes in the log ++ * omitting duplicates, and failed CS nodes, and leaving no gaps. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_consolidate_log(struct ubifs_info *c) ++{ ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ struct rb_root done_tree = RB_ROOT; ++ int lnum, err, first = 1, write_lnum, offs = 0; ++ void *buf; ++ ++ dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum, ++ c->lhead_lnum); ++ buf = vmalloc(c->leb_size); ++ if (!buf) ++ return -ENOMEM; ++ lnum = c->ltail_lnum; ++ write_lnum = lnum; ++ while (1) { ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ if (IS_ERR(sleb)) { ++ err = PTR_ERR(sleb); ++ goto out_free; ++ } ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ switch (snod->type) { ++ case UBIFS_REF_NODE: { ++ struct ubifs_ref_node *ref = snod->node; ++ int ref_lnum = le32_to_cpu(ref->lnum); ++ ++ err = done_already(&done_tree, ref_lnum); ++ if (err < 0) ++ goto out_scan; ++ if (err != 1) { ++ err = add_node(c, buf, &write_lnum, ++ &offs, snod->node); ++ if (err) ++ goto out_scan; ++ } ++ break; ++ } ++ case UBIFS_CS_NODE: ++ if (!first) ++ break; ++ err = add_node(c, buf, &write_lnum, &offs, ++ snod->node); ++ if (err) ++ goto out_scan; ++ first = 0; ++ break; ++ } ++ } ++ ubifs_scan_destroy(sleb); ++ if (lnum == c->lhead_lnum) ++ break; ++ lnum = next_log_lnum(c, lnum); ++ } ++ if (offs) { ++ int sz = ALIGN(offs, c->min_io_size); ++ ++ ubifs_pad(c, buf + offs, sz - offs); ++ err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM); ++ if (err) ++ goto out_free; ++ offs = ALIGN(offs, c->min_io_size); ++ } ++ destroy_done_tree(&done_tree); ++ vfree(buf); ++ if (write_lnum == c->lhead_lnum) { ++ ubifs_err("log is too full"); ++ return -EINVAL; ++ } ++ /* Unmap remaining LEBs */ ++ lnum = write_lnum; ++ do { ++ lnum = next_log_lnum(c, lnum); ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } while (lnum != c->lhead_lnum); ++ c->lhead_lnum = write_lnum; ++ c->lhead_offs = offs; ++ dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs); ++ return 0; ++ ++out_scan: ++ ubifs_scan_destroy(sleb); ++out_free: ++ destroy_done_tree(&done_tree); ++ vfree(buf); ++ return err; ++} ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++/** ++ * dbg_check_bud_bytes - make sure bud bytes calculation are all right. ++ * @c: UBIFS file-system description object ++ * ++ * This function makes sure the amount of flash space used by closed buds ++ * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in ++ * case of failure. ++ */ ++static int dbg_check_bud_bytes(struct ubifs_info *c) ++{ ++ int i, err = 0; ++ struct ubifs_bud *bud; ++ long long bud_bytes = 0; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ ++ spin_lock(&c->buds_lock); ++ for (i = 0; i < c->jhead_cnt; i++) ++ list_for_each_entry(bud, &c->jheads[i].buds_list, list) ++ bud_bytes += c->leb_size - bud->start; ++ ++ if (c->bud_bytes != bud_bytes) { ++ ubifs_err("bad bud_bytes %lld, calculated %lld", ++ c->bud_bytes, bud_bytes); ++ err = -EINVAL; ++ } ++ spin_unlock(&c->buds_lock); ++ ++ return err; ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -Nurd linux-2.6.24/fs/ubifs/lprops.c ubifs-v2.6.24/fs/ubifs/lprops.c +--- linux-2.6.24/fs/ubifs/lprops.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/lprops.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1333 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements the functions that access LEB properties and their ++ * categories. LEBs are categorized based on the needs of UBIFS, and the ++ * categories are stored as either heaps or lists to provide a fast way of ++ * finding a LEB in a particular category. For example, UBIFS may need to find ++ * an empty LEB for the journal, or a very dirty LEB for garbage collection. ++ */ ++ ++#include "ubifs.h" ++ ++/** ++ * get_heap_comp_val - get the LEB properties value for heap comparisons. ++ * @lprops: LEB properties ++ * @cat: LEB category ++ */ ++static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat) ++{ ++ switch (cat) { ++ case LPROPS_FREE: ++ return lprops->free; ++ case LPROPS_DIRTY_IDX: ++ return lprops->free + lprops->dirty; ++ default: ++ return lprops->dirty; ++ } ++} ++ ++/** ++ * move_up_lpt_heap - move a new heap entry up as far as possible. ++ * @c: UBIFS file-system description object ++ * @heap: LEB category heap ++ * @lprops: LEB properties to move ++ * @cat: LEB category ++ * ++ * New entries to a heap are added at the bottom and then moved up until the ++ * parent's value is greater. In the case of LPT's category heaps, the value ++ * is either the amount of free space or the amount of dirty space, depending ++ * on the category. ++ */ ++static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, ++ struct ubifs_lprops *lprops, int cat) ++{ ++ int val1, val2, hpos; ++ ++ hpos = lprops->hpos; ++ if (!hpos) ++ return; /* Already top of the heap */ ++ val1 = get_heap_comp_val(lprops, cat); ++ /* Compare to parent and, if greater, move up the heap */ ++ do { ++ int ppos = (hpos - 1) / 2; ++ ++ val2 = get_heap_comp_val(heap->arr[ppos], cat); ++ if (val2 >= val1) ++ return; ++ /* Greater than parent so move up */ ++ heap->arr[ppos]->hpos = hpos; ++ heap->arr[hpos] = heap->arr[ppos]; ++ heap->arr[ppos] = lprops; ++ lprops->hpos = ppos; ++ hpos = ppos; ++ } while (hpos); ++} ++ ++/** ++ * adjust_lpt_heap - move a changed heap entry up or down the heap. ++ * @c: UBIFS file-system description object ++ * @heap: LEB category heap ++ * @lprops: LEB properties to move ++ * @hpos: heap position of @lprops ++ * @cat: LEB category ++ * ++ * Changed entries in a heap are moved up or down until the parent's value is ++ * greater. In the case of LPT's category heaps, the value is either the amount ++ * of free space or the amount of dirty space, depending on the category. ++ */ ++static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, ++ struct ubifs_lprops *lprops, int hpos, int cat) ++{ ++ int val1, val2, val3, cpos; ++ ++ val1 = get_heap_comp_val(lprops, cat); ++ /* Compare to parent and, if greater than parent, move up the heap */ ++ if (hpos) { ++ int ppos = (hpos - 1) / 2; ++ ++ val2 = get_heap_comp_val(heap->arr[ppos], cat); ++ if (val1 > val2) { ++ /* Greater than parent so move up */ ++ while (1) { ++ heap->arr[ppos]->hpos = hpos; ++ heap->arr[hpos] = heap->arr[ppos]; ++ heap->arr[ppos] = lprops; ++ lprops->hpos = ppos; ++ hpos = ppos; ++ if (!hpos) ++ return; ++ ppos = (hpos - 1) / 2; ++ val2 = get_heap_comp_val(heap->arr[ppos], cat); ++ if (val1 <= val2) ++ return; ++ /* Still greater than parent so keep going */ ++ } ++ } ++ } ++ ++ /* Not greater than parent, so compare to children */ ++ while (1) { ++ /* Compare to left child */ ++ cpos = hpos * 2 + 1; ++ if (cpos >= heap->cnt) ++ return; ++ val2 = get_heap_comp_val(heap->arr[cpos], cat); ++ if (val1 < val2) { ++ /* Less than left child, so promote biggest child */ ++ if (cpos + 1 < heap->cnt) { ++ val3 = get_heap_comp_val(heap->arr[cpos + 1], ++ cat); ++ if (val3 > val2) ++ cpos += 1; /* Right child is bigger */ ++ } ++ heap->arr[cpos]->hpos = hpos; ++ heap->arr[hpos] = heap->arr[cpos]; ++ heap->arr[cpos] = lprops; ++ lprops->hpos = cpos; ++ hpos = cpos; ++ continue; ++ } ++ /* Compare to right child */ ++ cpos += 1; ++ if (cpos >= heap->cnt) ++ return; ++ val3 = get_heap_comp_val(heap->arr[cpos], cat); ++ if (val1 < val3) { ++ /* Less than right child, so promote right child */ ++ heap->arr[cpos]->hpos = hpos; ++ heap->arr[hpos] = heap->arr[cpos]; ++ heap->arr[cpos] = lprops; ++ lprops->hpos = cpos; ++ hpos = cpos; ++ continue; ++ } ++ return; ++ } ++} ++ ++/** ++ * add_to_lpt_heap - add LEB properties to a LEB category heap. ++ * @c: UBIFS file-system description object ++ * @lprops: LEB properties to add ++ * @cat: LEB category ++ * ++ * This function returns %1 if @lprops is added to the heap for LEB category ++ * @cat, otherwise %0 is returned because the heap is full. ++ */ ++static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops, ++ int cat) ++{ ++ struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; ++ ++ if (heap->cnt >= heap->max_cnt) { ++ const int b = LPT_HEAP_SZ / 2 - 1; ++ int cpos, val1, val2; ++ ++ /* Compare to some other LEB on the bottom of heap */ ++ /* Pick a position kind of randomly */ ++ cpos = (((size_t)lprops >> 4) & b) + b; ++ ubifs_assert(cpos >= b); ++ ubifs_assert(cpos < LPT_HEAP_SZ); ++ ubifs_assert(cpos < heap->cnt); ++ ++ val1 = get_heap_comp_val(lprops, cat); ++ val2 = get_heap_comp_val(heap->arr[cpos], cat); ++ if (val1 > val2) { ++ struct ubifs_lprops *lp; ++ ++ lp = heap->arr[cpos]; ++ lp->flags &= ~LPROPS_CAT_MASK; ++ lp->flags |= LPROPS_UNCAT; ++ list_add(&lp->list, &c->uncat_list); ++ lprops->hpos = cpos; ++ heap->arr[cpos] = lprops; ++ move_up_lpt_heap(c, heap, lprops, cat); ++ dbg_check_heap(c, heap, cat, lprops->hpos); ++ return 1; /* Added to heap */ ++ } ++ dbg_check_heap(c, heap, cat, -1); ++ return 0; /* Not added to heap */ ++ } else { ++ lprops->hpos = heap->cnt++; ++ heap->arr[lprops->hpos] = lprops; ++ move_up_lpt_heap(c, heap, lprops, cat); ++ dbg_check_heap(c, heap, cat, lprops->hpos); ++ return 1; /* Added to heap */ ++ } ++} ++ ++/** ++ * remove_from_lpt_heap - remove LEB properties from a LEB category heap. ++ * @c: UBIFS file-system description object ++ * @lprops: LEB properties to remove ++ * @cat: LEB category ++ */ ++static void remove_from_lpt_heap(struct ubifs_info *c, ++ struct ubifs_lprops *lprops, int cat) ++{ ++ struct ubifs_lpt_heap *heap; ++ int hpos = lprops->hpos; ++ ++ heap = &c->lpt_heap[cat - 1]; ++ ubifs_assert(hpos >= 0 && hpos < heap->cnt); ++ ubifs_assert(heap->arr[hpos] == lprops); ++ heap->cnt -= 1; ++ if (hpos < heap->cnt) { ++ heap->arr[hpos] = heap->arr[heap->cnt]; ++ heap->arr[hpos]->hpos = hpos; ++ adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat); ++ } ++ dbg_check_heap(c, heap, cat, -1); ++} ++ ++/** ++ * lpt_heap_replace - replace lprops in a category heap. ++ * @c: UBIFS file-system description object ++ * @old_lprops: LEB properties to replace ++ * @new_lprops: LEB properties with which to replace ++ * @cat: LEB category ++ * ++ * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) ++ * and the lprops that the pnode contains. When that happens, references in ++ * the category heaps to those lprops must be updated to point to the new ++ * lprops. This function does that. ++ */ ++static void lpt_heap_replace(struct ubifs_info *c, ++ struct ubifs_lprops *old_lprops, ++ struct ubifs_lprops *new_lprops, int cat) ++{ ++ struct ubifs_lpt_heap *heap; ++ int hpos = new_lprops->hpos; ++ ++ heap = &c->lpt_heap[cat - 1]; ++ heap->arr[hpos] = new_lprops; ++} ++ ++/** ++ * ubifs_add_to_cat - add LEB properties to a category list or heap. ++ * @c: UBIFS file-system description object ++ * @lprops: LEB properties to add ++ * @cat: LEB category to which to add ++ * ++ * LEB properties are categorized to enable fast find operations. ++ */ ++void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, ++ int cat) ++{ ++ switch (cat) { ++ case LPROPS_DIRTY: ++ case LPROPS_DIRTY_IDX: ++ case LPROPS_FREE: ++ if (add_to_lpt_heap(c, lprops, cat)) ++ break; ++ /* No more room on heap so make it uncategorized */ ++ cat = LPROPS_UNCAT; ++ /* Fall through */ ++ case LPROPS_UNCAT: ++ list_add(&lprops->list, &c->uncat_list); ++ break; ++ case LPROPS_EMPTY: ++ list_add(&lprops->list, &c->empty_list); ++ break; ++ case LPROPS_FREEABLE: ++ list_add(&lprops->list, &c->freeable_list); ++ c->freeable_cnt += 1; ++ break; ++ case LPROPS_FRDI_IDX: ++ list_add(&lprops->list, &c->frdi_idx_list); ++ break; ++ default: ++ ubifs_assert(0); ++ } ++ lprops->flags &= ~LPROPS_CAT_MASK; ++ lprops->flags |= cat; ++} ++ ++/** ++ * ubifs_remove_from_cat - remove LEB properties from a category list or heap. ++ * @c: UBIFS file-system description object ++ * @lprops: LEB properties to remove ++ * @cat: LEB category from which to remove ++ * ++ * LEB properties are categorized to enable fast find operations. ++ */ ++static void ubifs_remove_from_cat(struct ubifs_info *c, ++ struct ubifs_lprops *lprops, int cat) ++{ ++ switch (cat) { ++ case LPROPS_DIRTY: ++ case LPROPS_DIRTY_IDX: ++ case LPROPS_FREE: ++ remove_from_lpt_heap(c, lprops, cat); ++ break; ++ case LPROPS_FREEABLE: ++ c->freeable_cnt -= 1; ++ ubifs_assert(c->freeable_cnt >= 0); ++ /* Fall through */ ++ case LPROPS_UNCAT: ++ case LPROPS_EMPTY: ++ case LPROPS_FRDI_IDX: ++ ubifs_assert(!list_empty(&lprops->list)); ++ list_del(&lprops->list); ++ break; ++ default: ++ ubifs_assert(0); ++ } ++} ++ ++/** ++ * ubifs_replace_cat - replace lprops in a category list or heap. ++ * @c: UBIFS file-system description object ++ * @old_lprops: LEB properties to replace ++ * @new_lprops: LEB properties with which to replace ++ * ++ * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) ++ * and the lprops that the pnode contains. When that happens, references in ++ * category lists and heaps must be replaced. This function does that. ++ */ ++void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, ++ struct ubifs_lprops *new_lprops) ++{ ++ int cat; ++ ++ cat = new_lprops->flags & LPROPS_CAT_MASK; ++ switch (cat) { ++ case LPROPS_DIRTY: ++ case LPROPS_DIRTY_IDX: ++ case LPROPS_FREE: ++ lpt_heap_replace(c, old_lprops, new_lprops, cat); ++ break; ++ case LPROPS_UNCAT: ++ case LPROPS_EMPTY: ++ case LPROPS_FREEABLE: ++ case LPROPS_FRDI_IDX: ++ list_replace(&old_lprops->list, &new_lprops->list); ++ break; ++ default: ++ ubifs_assert(0); ++ } ++} ++ ++/** ++ * ubifs_ensure_cat - ensure LEB properties are categorized. ++ * @c: UBIFS file-system description object ++ * @lprops: LEB properties ++ * ++ * A LEB may have fallen off of the bottom of a heap, and ended up as ++ * uncategorized even though it has enough space for us now. If that is the case ++ * this function will put the LEB back onto a heap. ++ */ ++void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) ++{ ++ int cat = lprops->flags & LPROPS_CAT_MASK; ++ ++ if (cat != LPROPS_UNCAT) ++ return; ++ cat = ubifs_categorize_lprops(c, lprops); ++ if (cat == LPROPS_UNCAT) ++ return; ++ ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT); ++ ubifs_add_to_cat(c, lprops, cat); ++} ++ ++/** ++ * ubifs_categorize_lprops - categorize LEB properties. ++ * @c: UBIFS file-system description object ++ * @lprops: LEB properties to categorize ++ * ++ * LEB properties are categorized to enable fast find operations. This function ++ * returns the LEB category to which the LEB properties belong. Note however ++ * that if the LEB category is stored as a heap and the heap is full, the ++ * LEB properties may have their category changed to %LPROPS_UNCAT. ++ */ ++int ubifs_categorize_lprops(const struct ubifs_info *c, ++ const struct ubifs_lprops *lprops) ++{ ++ if (lprops->flags & LPROPS_TAKEN) ++ return LPROPS_UNCAT; ++ ++ if (lprops->free == c->leb_size) { ++ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); ++ return LPROPS_EMPTY; ++ } ++ ++ if (lprops->free + lprops->dirty == c->leb_size) { ++ if (lprops->flags & LPROPS_INDEX) ++ return LPROPS_FRDI_IDX; ++ else ++ return LPROPS_FREEABLE; ++ } ++ ++ if (lprops->flags & LPROPS_INDEX) { ++ if (lprops->dirty + lprops->free >= c->min_idx_node_sz) ++ return LPROPS_DIRTY_IDX; ++ } else { ++ if (lprops->dirty >= c->dead_wm && ++ lprops->dirty > lprops->free) ++ return LPROPS_DIRTY; ++ if (lprops->free > 0) ++ return LPROPS_FREE; ++ } ++ ++ return LPROPS_UNCAT; ++} ++ ++/** ++ * change_category - change LEB properties category. ++ * @c: UBIFS file-system description object ++ * @lprops: LEB properties to recategorize ++ * ++ * LEB properties are categorized to enable fast find operations. When the LEB ++ * properties change they must be recategorized. ++ */ ++static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) ++{ ++ int old_cat = lprops->flags & LPROPS_CAT_MASK; ++ int new_cat = ubifs_categorize_lprops(c, lprops); ++ ++ if (old_cat == new_cat) { ++ struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; ++ ++ /* lprops on a heap now must be moved up or down */ ++ if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) ++ return; /* Not on a heap */ ++ heap = &c->lpt_heap[new_cat - 1]; ++ adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat); ++ } else { ++ ubifs_remove_from_cat(c, lprops, old_cat); ++ ubifs_add_to_cat(c, lprops, new_cat); ++ } ++} ++ ++/** ++ * calc_dark - calculate LEB dark space size. ++ * @c: the UBIFS file-system description object ++ * @spc: amount of free and dirty space in the LEB ++ * ++ * This function calculates amount of dark space in an LEB which has @spc bytes ++ * of free and dirty space. Returns the calculations result. ++ * ++ * Dark space is the space which is not always usable - it depends on which ++ * nodes are written in which order. E.g., if an LEB has only 512 free bytes, ++ * it is dark space, because it cannot fit a large data node. So UBIFS cannot ++ * count on this LEB and treat these 512 bytes as usable because it is not true ++ * if, for example, only big chunks of uncompressible data will be written to ++ * the FS. ++ */ ++static int calc_dark(struct ubifs_info *c, int spc) ++{ ++ ubifs_assert(!(spc & 7)); ++ ++ if (spc < c->dark_wm) ++ return spc; ++ ++ /* ++ * If we have slightly more space then the dark space watermark, we can ++ * anyway safely assume it we'll be able to write a node of the ++ * smallest size there. ++ */ ++ if (spc - c->dark_wm < MIN_WRITE_SZ) ++ return spc - MIN_WRITE_SZ; ++ ++ return c->dark_wm; ++} ++ ++/** ++ * is_lprops_dirty - determine if LEB properties are dirty. ++ * @c: the UBIFS file-system description object ++ * @lprops: LEB properties to test ++ */ ++static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) ++{ ++ struct ubifs_pnode *pnode; ++ int pos; ++ ++ pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1); ++ pnode = (struct ubifs_pnode *)container_of(lprops - pos, ++ struct ubifs_pnode, ++ lprops[0]); ++ return !test_bit(COW_ZNODE, &pnode->flags) && ++ test_bit(DIRTY_CNODE, &pnode->flags); ++} ++ ++/** ++ * ubifs_change_lp - change LEB properties. ++ * @c: the UBIFS file-system description object ++ * @lp: LEB properties to change ++ * @free: new free space amount ++ * @dirty: new dirty space amount ++ * @flags: new flags ++ * @idx_gc_cnt: change to the count of idx_gc list ++ * ++ * This function changes LEB properties (@free, @dirty or @flag). However, the ++ * property which has the %LPROPS_NC value is not changed. Returns a pointer to ++ * the updated LEB properties on success and a negative error code on failure. ++ * ++ * Note, the LEB properties may have had to be copied (due to COW) and ++ * consequently the pointer returned may not be the same as the pointer ++ * passed. ++ */ ++const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, ++ const struct ubifs_lprops *lp, ++ int free, int dirty, int flags, ++ int idx_gc_cnt) ++{ ++ /* ++ * This is the only function that is allowed to change lprops, so we ++ * discard the const qualifier. ++ */ ++ struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; ++ ++ dbg_lp("LEB %d, free %d, dirty %d, flags %d", ++ lprops->lnum, free, dirty, flags); ++ ++ ubifs_assert(mutex_is_locked(&c->lp_mutex)); ++ ubifs_assert(c->lst.empty_lebs >= 0 && ++ c->lst.empty_lebs <= c->main_lebs); ++ ubifs_assert(c->freeable_cnt >= 0); ++ ubifs_assert(c->freeable_cnt <= c->main_lebs); ++ ubifs_assert(c->lst.taken_empty_lebs >= 0); ++ ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs); ++ ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7)); ++ ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7)); ++ ubifs_assert(!(c->lst.total_used & 7)); ++ ubifs_assert(free == LPROPS_NC || free >= 0); ++ ubifs_assert(dirty == LPROPS_NC || dirty >= 0); ++ ++ if (!is_lprops_dirty(c, lprops)) { ++ lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum); ++ if (IS_ERR(lprops)) ++ return lprops; ++ } else ++ ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum)); ++ ++ ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); ++ ++ spin_lock(&c->space_lock); ++ if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) ++ c->lst.taken_empty_lebs -= 1; ++ ++ if (!(lprops->flags & LPROPS_INDEX)) { ++ int old_spc; ++ ++ old_spc = lprops->free + lprops->dirty; ++ if (old_spc < c->dead_wm) ++ c->lst.total_dead -= old_spc; ++ else ++ c->lst.total_dark -= calc_dark(c, old_spc); ++ ++ c->lst.total_used -= c->leb_size - old_spc; ++ } ++ ++ if (free != LPROPS_NC) { ++ free = ALIGN(free, 8); ++ c->lst.total_free += free - lprops->free; ++ ++ /* Increase or decrease empty LEBs counter if needed */ ++ if (free == c->leb_size) { ++ if (lprops->free != c->leb_size) ++ c->lst.empty_lebs += 1; ++ } else if (lprops->free == c->leb_size) ++ c->lst.empty_lebs -= 1; ++ lprops->free = free; ++ } ++ ++ if (dirty != LPROPS_NC) { ++ dirty = ALIGN(dirty, 8); ++ c->lst.total_dirty += dirty - lprops->dirty; ++ lprops->dirty = dirty; ++ } ++ ++ if (flags != LPROPS_NC) { ++ /* Take care about indexing LEBs counter if needed */ ++ if ((lprops->flags & LPROPS_INDEX)) { ++ if (!(flags & LPROPS_INDEX)) ++ c->lst.idx_lebs -= 1; ++ } else if (flags & LPROPS_INDEX) ++ c->lst.idx_lebs += 1; ++ lprops->flags = flags; ++ } ++ ++ if (!(lprops->flags & LPROPS_INDEX)) { ++ int new_spc; ++ ++ new_spc = lprops->free + lprops->dirty; ++ if (new_spc < c->dead_wm) ++ c->lst.total_dead += new_spc; ++ else ++ c->lst.total_dark += calc_dark(c, new_spc); ++ ++ c->lst.total_used += c->leb_size - new_spc; ++ } ++ ++ if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) ++ c->lst.taken_empty_lebs += 1; ++ ++ change_category(c, lprops); ++ c->idx_gc_cnt += idx_gc_cnt; ++ spin_unlock(&c->space_lock); ++ return lprops; ++} ++ ++/** ++ * ubifs_get_lp_stats - get lprops statistics. ++ * @c: UBIFS file-system description object ++ * @st: return statistics ++ */ ++void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst) ++{ ++ spin_lock(&c->space_lock); ++ memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats)); ++ spin_unlock(&c->space_lock); ++} ++ ++/** ++ * ubifs_change_one_lp - change LEB properties. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB to change properties for ++ * @free: amount of free space ++ * @dirty: amount of dirty space ++ * @flags_set: flags to set ++ * @flags_clean: flags to clean ++ * @idx_gc_cnt: change to the count of idx_gc list ++ * ++ * This function changes properties of LEB @lnum. It is a helper wrapper over ++ * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the ++ * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and ++ * a negative error code in case of failure. ++ */ ++int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, ++ int flags_set, int flags_clean, int idx_gc_cnt) ++{ ++ int err = 0, flags; ++ const struct ubifs_lprops *lp; ++ ++ ubifs_get_lprops(c); ++ ++ lp = ubifs_lpt_lookup_dirty(c, lnum); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ++ flags = (lp->flags | flags_set) & ~flags_clean; ++ lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt); ++ if (IS_ERR(lp)) ++ err = PTR_ERR(lp); ++ ++out: ++ ubifs_release_lprops(c); ++ if (err) ++ ubifs_err("cannot change properties of LEB %d, error %d", ++ lnum, err); ++ return err; ++} ++ ++/** ++ * ubifs_update_one_lp - update LEB properties. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB to change properties for ++ * @free: amount of free space ++ * @dirty: amount of dirty space to add ++ * @flags_set: flags to set ++ * @flags_clean: flags to clean ++ * ++ * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to ++ * current dirty space, not substitutes it. ++ */ ++int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, ++ int flags_set, int flags_clean) ++{ ++ int err = 0, flags; ++ const struct ubifs_lprops *lp; ++ ++ ubifs_get_lprops(c); ++ ++ lp = ubifs_lpt_lookup_dirty(c, lnum); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ++ flags = (lp->flags | flags_set) & ~flags_clean; ++ lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0); ++ if (IS_ERR(lp)) ++ err = PTR_ERR(lp); ++ ++out: ++ ubifs_release_lprops(c); ++ if (err) ++ ubifs_err("cannot update properties of LEB %d, error %d", ++ lnum, err); ++ return err; ++} ++ ++/** ++ * ubifs_read_one_lp - read LEB properties. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB to read properties for ++ * @lp: where to store read properties ++ * ++ * This helper function reads properties of a LEB @lnum and stores them in @lp. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) ++{ ++ int err = 0; ++ const struct ubifs_lprops *lpp; ++ ++ ubifs_get_lprops(c); ++ ++ lpp = ubifs_lpt_lookup(c, lnum); ++ if (IS_ERR(lpp)) { ++ err = PTR_ERR(lpp); ++ ubifs_err("cannot read properties of LEB %d, error %d", ++ lnum, err); ++ goto out; ++ } ++ ++ memcpy(lp, lpp, sizeof(struct ubifs_lprops)); ++ ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * ubifs_fast_find_free - try to find a LEB with free space quickly. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns LEB properties for a LEB with free space or %NULL if ++ * the function is unable to find a LEB quickly. ++ */ ++const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ ++ ubifs_assert(mutex_is_locked(&c->lp_mutex)); ++ ++ heap = &c->lpt_heap[LPROPS_FREE - 1]; ++ if (heap->cnt == 0) ++ return NULL; ++ ++ lprops = heap->arr[0]; ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); ++ return lprops; ++} ++ ++/** ++ * ubifs_fast_find_empty - try to find an empty LEB quickly. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns LEB properties for an empty LEB or %NULL if the ++ * function is unable to find an empty LEB quickly. ++ */ ++const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ ++ ubifs_assert(mutex_is_locked(&c->lp_mutex)); ++ ++ if (list_empty(&c->empty_list)) ++ return NULL; ++ ++ lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list); ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); ++ ubifs_assert(lprops->free == c->leb_size); ++ return lprops; ++} ++ ++/** ++ * ubifs_fast_find_freeable - try to find a freeable LEB quickly. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns LEB properties for a freeable LEB or %NULL if the ++ * function is unable to find a freeable LEB quickly. ++ */ ++const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ ++ ubifs_assert(mutex_is_locked(&c->lp_mutex)); ++ ++ if (list_empty(&c->freeable_list)) ++ return NULL; ++ ++ lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list); ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert(!(lprops->flags & LPROPS_INDEX)); ++ ubifs_assert(lprops->free + lprops->dirty == c->leb_size); ++ ubifs_assert(c->freeable_cnt > 0); ++ return lprops; ++} ++ ++/** ++ * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns LEB properties for a freeable index LEB or %NULL if the ++ * function is unable to find a freeable index LEB quickly. ++ */ ++const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ ++ ubifs_assert(mutex_is_locked(&c->lp_mutex)); ++ ++ if (list_empty(&c->frdi_idx_list)) ++ return NULL; ++ ++ lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list); ++ ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); ++ ubifs_assert((lprops->flags & LPROPS_INDEX)); ++ ubifs_assert(lprops->free + lprops->dirty == c->leb_size); ++ return lprops; ++} ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++/** ++ * dbg_check_cats - check category heaps and lists. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int dbg_check_cats(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ struct list_head *pos; ++ int i, cat; ++ ++ if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) ++ return 0; ++ ++ list_for_each_entry(lprops, &c->empty_list, list) { ++ if (lprops->free != c->leb_size) { ++ ubifs_err("non-empty LEB %d on empty list " ++ "(free %d dirty %d flags %d)", lprops->lnum, ++ lprops->free, lprops->dirty, lprops->flags); ++ return -EINVAL; ++ } ++ if (lprops->flags & LPROPS_TAKEN) { ++ ubifs_err("taken LEB %d on empty list " ++ "(free %d dirty %d flags %d)", lprops->lnum, ++ lprops->free, lprops->dirty, lprops->flags); ++ return -EINVAL; ++ } ++ } ++ ++ i = 0; ++ list_for_each_entry(lprops, &c->freeable_list, list) { ++ if (lprops->free + lprops->dirty != c->leb_size) { ++ ubifs_err("non-freeable LEB %d on freeable list " ++ "(free %d dirty %d flags %d)", lprops->lnum, ++ lprops->free, lprops->dirty, lprops->flags); ++ return -EINVAL; ++ } ++ if (lprops->flags & LPROPS_TAKEN) { ++ ubifs_err("taken LEB %d on freeable list " ++ "(free %d dirty %d flags %d)", lprops->lnum, ++ lprops->free, lprops->dirty, lprops->flags); ++ return -EINVAL; ++ } ++ i += 1; ++ } ++ if (i != c->freeable_cnt) { ++ ubifs_err("freeable list count %d expected %d", i, ++ c->freeable_cnt); ++ return -EINVAL; ++ } ++ ++ i = 0; ++ list_for_each(pos, &c->idx_gc) ++ i += 1; ++ if (i != c->idx_gc_cnt) { ++ ubifs_err("idx_gc list count %d expected %d", i, ++ c->idx_gc_cnt); ++ return -EINVAL; ++ } ++ ++ list_for_each_entry(lprops, &c->frdi_idx_list, list) { ++ if (lprops->free + lprops->dirty != c->leb_size) { ++ ubifs_err("non-freeable LEB %d on frdi_idx list " ++ "(free %d dirty %d flags %d)", lprops->lnum, ++ lprops->free, lprops->dirty, lprops->flags); ++ return -EINVAL; ++ } ++ if (lprops->flags & LPROPS_TAKEN) { ++ ubifs_err("taken LEB %d on frdi_idx list " ++ "(free %d dirty %d flags %d)", lprops->lnum, ++ lprops->free, lprops->dirty, lprops->flags); ++ return -EINVAL; ++ } ++ if (!(lprops->flags & LPROPS_INDEX)) { ++ ubifs_err("non-index LEB %d on frdi_idx list " ++ "(free %d dirty %d flags %d)", lprops->lnum, ++ lprops->free, lprops->dirty, lprops->flags); ++ return -EINVAL; ++ } ++ } ++ ++ for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) { ++ struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; ++ ++ for (i = 0; i < heap->cnt; i++) { ++ lprops = heap->arr[i]; ++ if (!lprops) { ++ ubifs_err("null ptr in LPT heap cat %d", cat); ++ return -EINVAL; ++ } ++ if (lprops->hpos != i) { ++ ubifs_err("bad ptr in LPT heap cat %d", cat); ++ return -EINVAL; ++ } ++ if (lprops->flags & LPROPS_TAKEN) { ++ ubifs_err("taken LEB in LPT heap cat %d", cat); ++ return -EINVAL; ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, ++ int add_pos) ++{ ++ int i = 0, j, err = 0; ++ ++ if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) ++ return; ++ ++ for (i = 0; i < heap->cnt; i++) { ++ struct ubifs_lprops *lprops = heap->arr[i]; ++ struct ubifs_lprops *lp; ++ ++ if (i != add_pos) ++ if ((lprops->flags & LPROPS_CAT_MASK) != cat) { ++ err = 1; ++ goto out; ++ } ++ if (lprops->hpos != i) { ++ err = 2; ++ goto out; ++ } ++ lp = ubifs_lpt_lookup(c, lprops->lnum); ++ if (IS_ERR(lp)) { ++ err = 3; ++ goto out; ++ } ++ if (lprops != lp) { ++ dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", ++ (size_t)lprops, (size_t)lp, lprops->lnum, ++ lp->lnum); ++ err = 4; ++ goto out; ++ } ++ for (j = 0; j < i; j++) { ++ lp = heap->arr[j]; ++ if (lp == lprops) { ++ err = 5; ++ goto out; ++ } ++ if (lp->lnum == lprops->lnum) { ++ err = 6; ++ goto out; ++ } ++ } ++ } ++out: ++ if (err) { ++ dbg_msg("failed cat %d hpos %d err %d", cat, i, err); ++ dbg_dump_stack(); ++ dbg_dump_heap(c, heap, cat); ++ } ++} ++ ++/** ++ * struct scan_check_data - data provided to scan callback function. ++ * @lst: LEB properties statistics ++ * @err: error code ++ */ ++struct scan_check_data { ++ struct ubifs_lp_stats lst; ++ int err; ++}; ++ ++/** ++ * scan_check_cb - scan callback. ++ * @c: the UBIFS file-system description object ++ * @lp: LEB properties to scan ++ * @in_tree: whether the LEB properties are in main memory ++ * @data: information passed to and from the caller of the scan ++ * ++ * This function returns a code that indicates whether the scan should continue ++ * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree ++ * in main memory (%LPT_SCAN_ADD), or whether the scan should stop ++ * (%LPT_SCAN_STOP). ++ */ ++static int scan_check_cb(struct ubifs_info *c, ++ const struct ubifs_lprops *lp, int in_tree, ++ struct scan_check_data *data) ++{ ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ struct ubifs_lp_stats *lst = &data->lst; ++ int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; ++ ++ cat = lp->flags & LPROPS_CAT_MASK; ++ if (cat != LPROPS_UNCAT) { ++ cat = ubifs_categorize_lprops(c, lp); ++ if (cat != (lp->flags & LPROPS_CAT_MASK)) { ++ ubifs_err("bad LEB category %d expected %d", ++ (lp->flags & LPROPS_CAT_MASK), cat); ++ goto out; ++ } ++ } ++ ++ /* Check lp is on its category list (if it has one) */ ++ if (in_tree) { ++ struct list_head *list = NULL; ++ ++ switch (cat) { ++ case LPROPS_EMPTY: ++ list = &c->empty_list; ++ break; ++ case LPROPS_FREEABLE: ++ list = &c->freeable_list; ++ break; ++ case LPROPS_FRDI_IDX: ++ list = &c->frdi_idx_list; ++ break; ++ case LPROPS_UNCAT: ++ list = &c->uncat_list; ++ break; ++ } ++ if (list) { ++ struct ubifs_lprops *lprops; ++ int found = 0; ++ ++ list_for_each_entry(lprops, list, list) { ++ if (lprops == lp) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) { ++ ubifs_err("bad LPT list (category %d)", cat); ++ goto out; ++ } ++ } ++ } ++ ++ /* Check lp is on its category heap (if it has one) */ ++ if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) { ++ struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; ++ ++ if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || ++ lp != heap->arr[lp->hpos]) { ++ ubifs_err("bad LPT heap (category %d)", cat); ++ goto out; ++ } ++ } ++ ++ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); ++ if (IS_ERR(sleb)) { ++ /* ++ * After an unclean unmount, empty and freeable LEBs ++ * may contain garbage. ++ */ ++ if (lp->free == c->leb_size) { ++ ubifs_err("scan errors were in empty LEB " ++ "- continuing checking"); ++ lst->empty_lebs += 1; ++ lst->total_free += c->leb_size; ++ lst->total_dark += calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ ++ if (lp->free + lp->dirty == c->leb_size && ++ !(lp->flags & LPROPS_INDEX)) { ++ ubifs_err("scan errors were in freeable LEB " ++ "- continuing checking"); ++ lst->total_free += lp->free; ++ lst->total_dirty += lp->dirty; ++ lst->total_dark += calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ data->err = PTR_ERR(sleb); ++ return LPT_SCAN_STOP; ++ } ++ ++ is_idx = -1; ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ int found, level = 0; ++ ++ cond_resched(); ++ ++ if (is_idx == -1) ++ is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; ++ ++ if (is_idx && snod->type != UBIFS_IDX_NODE) { ++ ubifs_err("indexing node in data LEB %d:%d", ++ lnum, snod->offs); ++ goto out_destroy; ++ } ++ ++ if (snod->type == UBIFS_IDX_NODE) { ++ struct ubifs_idx_node *idx = snod->node; ++ ++ key_read(c, ubifs_idx_key(c, idx), &snod->key); ++ level = le16_to_cpu(idx->level); ++ } ++ ++ found = ubifs_tnc_has_node(c, &snod->key, level, lnum, ++ snod->offs, is_idx); ++ if (found) { ++ if (found < 0) ++ goto out_destroy; ++ used += ALIGN(snod->len, 8); ++ } ++ } ++ ++ free = c->leb_size - sleb->endpt; ++ dirty = sleb->endpt - used; ++ ++ if (free > c->leb_size || free < 0 || dirty > c->leb_size || ++ dirty < 0) { ++ ubifs_err("bad calculated accounting for LEB %d: " ++ "free %d, dirty %d", lnum, free, dirty); ++ goto out_destroy; ++ } ++ ++ if (lp->free + lp->dirty == c->leb_size && ++ free + dirty == c->leb_size) ++ if ((is_idx && !(lp->flags & LPROPS_INDEX)) || ++ (!is_idx && free == c->leb_size) || ++ lp->free == c->leb_size) { ++ /* ++ * Empty or freeable LEBs could contain index ++ * nodes from an uncompleted commit due to an ++ * unclean unmount. Or they could be empty for ++ * the same reason. Or it may simply not have been ++ * unmapped. ++ */ ++ free = lp->free; ++ dirty = lp->dirty; ++ is_idx = 0; ++ } ++ ++ if (is_idx && lp->free + lp->dirty == free + dirty && ++ lnum != c->ihead_lnum) { ++ /* ++ * After an unclean unmount, an index LEB could have a different ++ * amount of free space than the value recorded by lprops. That ++ * is because the in-the-gaps method may use free space or ++ * create free space (as a side-effect of using ubi_leb_change ++ * and not writing the whole LEB). The incorrect free space ++ * value is not a problem because the index is only ever ++ * allocated empty LEBs, so there will never be an attempt to ++ * write to the free space at the end of an index LEB - except ++ * by the in-the-gaps method for which it is not a problem. ++ */ ++ free = lp->free; ++ dirty = lp->dirty; ++ } ++ ++ if (lp->free != free || lp->dirty != dirty) ++ goto out_print; ++ ++ if (is_idx && !(lp->flags & LPROPS_INDEX)) { ++ if (free == c->leb_size) ++ /* Free but not unmapped LEB, it's fine */ ++ is_idx = 0; ++ else { ++ ubifs_err("indexing node without indexing " ++ "flag"); ++ goto out_print; ++ } ++ } ++ ++ if (!is_idx && (lp->flags & LPROPS_INDEX)) { ++ ubifs_err("data node with indexing flag"); ++ goto out_print; ++ } ++ ++ if (free == c->leb_size) ++ lst->empty_lebs += 1; ++ ++ if (is_idx) ++ lst->idx_lebs += 1; ++ ++ if (!(lp->flags & LPROPS_INDEX)) ++ lst->total_used += c->leb_size - free - dirty; ++ lst->total_free += free; ++ lst->total_dirty += dirty; ++ ++ if (!(lp->flags & LPROPS_INDEX)) { ++ int spc = free + dirty; ++ ++ if (spc < c->dead_wm) ++ lst->total_dead += spc; ++ else ++ lst->total_dark += calc_dark(c, spc); ++ } ++ ++ ubifs_scan_destroy(sleb); ++ return LPT_SCAN_CONTINUE; ++ ++out_print: ++ ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " ++ "should be free %d, dirty %d", ++ lnum, lp->free, lp->dirty, lp->flags, free, dirty); ++ dbg_dump_leb(c, lnum); ++out_destroy: ++ ubifs_scan_destroy(sleb); ++out: ++ data->err = -EINVAL; ++ return LPT_SCAN_STOP; ++} ++ ++/** ++ * dbg_check_lprops - check all LEB properties. ++ * @c: UBIFS file-system description object ++ * ++ * This function checks all LEB properties and makes sure they are all correct. ++ * It returns zero if everything is fine, %-EINVAL if there is an inconsistency ++ * and other negative error codes in case of other errors. This function is ++ * called while the file system is locked (because of commit start), so no ++ * additional locking is required. Note that locking the LPT mutex would cause ++ * a circular lock dependency with the TNC mutex. ++ */ ++int dbg_check_lprops(struct ubifs_info *c) ++{ ++ int i, err; ++ struct scan_check_data data; ++ struct ubifs_lp_stats *lst = &data.lst; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ return 0; ++ ++ /* ++ * As we are going to scan the media, the write buffers have to be ++ * synchronized. ++ */ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); ++ if (err) ++ return err; ++ } ++ ++ memset(lst, 0, sizeof(struct ubifs_lp_stats)); ++ ++ data.err = 0; ++ err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, ++ (ubifs_lpt_scan_callback)scan_check_cb, ++ &data); ++ if (err && err != -ENOSPC) ++ goto out; ++ if (data.err) { ++ err = data.err; ++ goto out; ++ } ++ ++ if (lst->empty_lebs != c->lst.empty_lebs || ++ lst->idx_lebs != c->lst.idx_lebs || ++ lst->total_free != c->lst.total_free || ++ lst->total_dirty != c->lst.total_dirty || ++ lst->total_used != c->lst.total_used) { ++ ubifs_err("bad overall accounting"); ++ ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " ++ "total_free %lld, total_dirty %lld, total_used %lld", ++ lst->empty_lebs, lst->idx_lebs, lst->total_free, ++ lst->total_dirty, lst->total_used); ++ ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " ++ "total_free %lld, total_dirty %lld, total_used %lld", ++ c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, ++ c->lst.total_dirty, c->lst.total_used); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ if (lst->total_dead != c->lst.total_dead || ++ lst->total_dark != c->lst.total_dark) { ++ ubifs_err("bad dead/dark space accounting"); ++ ubifs_err("calculated: total_dead %lld, total_dark %lld", ++ lst->total_dead, lst->total_dark); ++ ubifs_err("read from lprops: total_dead %lld, total_dark %lld", ++ c->lst.total_dead, c->lst.total_dark); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = dbg_check_cats(c); ++out: ++ return err; ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -Nurd linux-2.6.24/fs/ubifs/lpt.c ubifs-v2.6.24/fs/ubifs/lpt.c +--- linux-2.6.24/fs/ubifs/lpt.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/lpt.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,2271 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements the LEB properties tree (LPT) area. The LPT area ++ * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and ++ * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits ++ * between the log and the orphan area. ++ * ++ * The LPT area is like a miniature self-contained file system. It is required ++ * that it never runs out of space, is fast to access and update, and scales ++ * logarithmically. The LEB properties tree is implemented as a wandering tree ++ * much like the TNC, and the LPT area has its own garbage collection. ++ * ++ * The LPT has two slightly different forms called the "small model" and the ++ * "big model". The small model is used when the entire LEB properties table ++ * can be written into a single eraseblock. In that case, garbage collection ++ * consists of just writing the whole table, which therefore makes all other ++ * eraseblocks reusable. In the case of the big model, dirty eraseblocks are ++ * selected for garbage collection, which consists of marking the clean nodes in ++ * that LEB as dirty, and then only the dirty nodes are written out. Also, in ++ * the case of the big model, a table of LEB numbers is saved so that the entire ++ * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first ++ * mounted. ++ */ ++ ++#include "ubifs.h" ++#include <linux/crc16.h> ++ ++/** ++ * do_calc_lpt_geom - calculate sizes for the LPT area. ++ * @c: the UBIFS file-system description object ++ * ++ * Calculate the sizes of LPT bit fields, nodes, and tree, based on the ++ * properties of the flash and whether LPT is "big" (c->big_lpt). ++ */ ++static void do_calc_lpt_geom(struct ubifs_info *c) ++{ ++ int i, n, bits, per_leb_wastage, max_pnode_cnt; ++ long long sz, tot_wastage; ++ ++ n = c->main_lebs + c->max_leb_cnt - c->leb_cnt; ++ max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); ++ ++ c->lpt_hght = 1; ++ n = UBIFS_LPT_FANOUT; ++ while (n < max_pnode_cnt) { ++ c->lpt_hght += 1; ++ n <<= UBIFS_LPT_FANOUT_SHIFT; ++ } ++ ++ c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); ++ ++ n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT); ++ c->nnode_cnt = n; ++ for (i = 1; i < c->lpt_hght; i++) { ++ n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); ++ c->nnode_cnt += n; ++ } ++ ++ c->space_bits = fls(c->leb_size) - 3; ++ c->lpt_lnum_bits = fls(c->lpt_lebs); ++ c->lpt_offs_bits = fls(c->leb_size - 1); ++ c->lpt_spc_bits = fls(c->leb_size); ++ ++ n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT); ++ c->pcnt_bits = fls(n - 1); ++ ++ c->lnum_bits = fls(c->max_leb_cnt - 1); ++ ++ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + ++ (c->big_lpt ? c->pcnt_bits : 0) + ++ (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT; ++ c->pnode_sz = (bits + 7) / 8; ++ ++ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + ++ (c->big_lpt ? c->pcnt_bits : 0) + ++ (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT; ++ c->nnode_sz = (bits + 7) / 8; ++ ++ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + ++ c->lpt_lebs * c->lpt_spc_bits * 2; ++ c->ltab_sz = (bits + 7) / 8; ++ ++ bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + ++ c->lnum_bits * c->lsave_cnt; ++ c->lsave_sz = (bits + 7) / 8; ++ ++ /* Calculate the minimum LPT size */ ++ c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; ++ c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; ++ c->lpt_sz += c->ltab_sz; ++ if (c->big_lpt) ++ c->lpt_sz += c->lsave_sz; ++ ++ /* Add wastage */ ++ sz = c->lpt_sz; ++ per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz); ++ sz += per_leb_wastage; ++ tot_wastage = per_leb_wastage; ++ while (sz > c->leb_size) { ++ sz += per_leb_wastage; ++ sz -= c->leb_size; ++ tot_wastage += per_leb_wastage; ++ } ++ tot_wastage += ALIGN(sz, c->min_io_size) - sz; ++ c->lpt_sz += tot_wastage; ++} ++ ++/** ++ * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_calc_lpt_geom(struct ubifs_info *c) ++{ ++ int lebs_needed; ++ long long sz; ++ ++ do_calc_lpt_geom(c); ++ ++ /* Verify that lpt_lebs is big enough */ ++ sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ ++ lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); ++ if (lebs_needed > c->lpt_lebs) { ++ ubifs_err("too few LPT LEBs"); ++ return -EINVAL; ++ } ++ ++ /* Verify that ltab fits in a single LEB (since ltab is a single node */ ++ if (c->ltab_sz > c->leb_size) { ++ ubifs_err("LPT ltab too big"); ++ return -EINVAL; ++ } ++ ++ c->check_lpt_free = c->big_lpt; ++ return 0; ++} ++ ++/** ++ * calc_dflt_lpt_geom - calculate default LPT geometry. ++ * @c: the UBIFS file-system description object ++ * @main_lebs: number of main area LEBs is passed and returned here ++ * @big_lpt: whether the LPT area is "big" is returned here ++ * ++ * The size of the LPT area depends on parameters that themselves are dependent ++ * on the size of the LPT area. This function, successively recalculates the LPT ++ * area geometry until the parameters and resultant geometry are consistent. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, ++ int *big_lpt) ++{ ++ int i, lebs_needed; ++ long long sz; ++ ++ /* Start by assuming the minimum number of LPT LEBs */ ++ c->lpt_lebs = UBIFS_MIN_LPT_LEBS; ++ c->main_lebs = *main_lebs - c->lpt_lebs; ++ if (c->main_lebs <= 0) ++ return -EINVAL; ++ ++ /* And assume we will use the small LPT model */ ++ c->big_lpt = 0; ++ ++ /* ++ * Calculate the geometry based on assumptions above and then see if it ++ * makes sense ++ */ ++ do_calc_lpt_geom(c); ++ ++ /* Small LPT model must have lpt_sz < leb_size */ ++ if (c->lpt_sz > c->leb_size) { ++ /* Nope, so try again using big LPT model */ ++ c->big_lpt = 1; ++ do_calc_lpt_geom(c); ++ } ++ ++ /* Now check there are enough LPT LEBs */ ++ for (i = 0; i < 64 ; i++) { ++ sz = c->lpt_sz * 4; /* Allow 4 times the size */ ++ lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); ++ if (lebs_needed > c->lpt_lebs) { ++ /* Not enough LPT LEBs so try again with more */ ++ c->lpt_lebs = lebs_needed; ++ c->main_lebs = *main_lebs - c->lpt_lebs; ++ if (c->main_lebs <= 0) ++ return -EINVAL; ++ do_calc_lpt_geom(c); ++ continue; ++ } ++ if (c->ltab_sz > c->leb_size) { ++ ubifs_err("LPT ltab too big"); ++ return -EINVAL; ++ } ++ *main_lebs = c->main_lebs; ++ *big_lpt = c->big_lpt; ++ return 0; ++ } ++ return -EINVAL; ++} ++ ++/** ++ * pack_bits - pack bit fields end-to-end. ++ * @addr: address at which to pack (passed and next address returned) ++ * @pos: bit position at which to pack (passed and next position returned) ++ * @val: value to pack ++ * @nrbits: number of bits of value to pack (1-32) ++ */ ++static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits) ++{ ++ uint8_t *p = *addr; ++ int b = *pos; ++ ++ ubifs_assert(nrbits > 0); ++ ubifs_assert(nrbits <= 32); ++ ubifs_assert(*pos >= 0); ++ ubifs_assert(*pos < 8); ++ ubifs_assert((val >> nrbits) == 0 || nrbits == 32); ++ if (b) { ++ *p |= ((uint8_t)val) << b; ++ nrbits += b; ++ if (nrbits > 8) { ++ *++p = (uint8_t)(val >>= (8 - b)); ++ if (nrbits > 16) { ++ *++p = (uint8_t)(val >>= 8); ++ if (nrbits > 24) { ++ *++p = (uint8_t)(val >>= 8); ++ if (nrbits > 32) ++ *++p = (uint8_t)(val >>= 8); ++ } ++ } ++ } ++ } else { ++ *p = (uint8_t)val; ++ if (nrbits > 8) { ++ *++p = (uint8_t)(val >>= 8); ++ if (nrbits > 16) { ++ *++p = (uint8_t)(val >>= 8); ++ if (nrbits > 24) ++ *++p = (uint8_t)(val >>= 8); ++ } ++ } ++ } ++ b = nrbits & 7; ++ if (b == 0) ++ p++; ++ *addr = p; ++ *pos = b; ++} ++ ++/** ++ * ubifs_unpack_bits - unpack bit fields. ++ * @addr: address at which to unpack (passed and next address returned) ++ * @pos: bit position at which to unpack (passed and next position returned) ++ * @nrbits: number of bits of value to unpack (1-32) ++ * ++ * This functions returns the value unpacked. ++ */ ++uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) ++{ ++ const int k = 32 - nrbits; ++ uint8_t *p = *addr; ++ int b = *pos; ++ uint32_t uninitialized_var(val); ++ const int bytes = (nrbits + b + 7) >> 3; ++ ++ ubifs_assert(nrbits > 0); ++ ubifs_assert(nrbits <= 32); ++ ubifs_assert(*pos >= 0); ++ ubifs_assert(*pos < 8); ++ if (b) { ++ switch (bytes) { ++ case 2: ++ val = p[1]; ++ break; ++ case 3: ++ val = p[1] | ((uint32_t)p[2] << 8); ++ break; ++ case 4: ++ val = p[1] | ((uint32_t)p[2] << 8) | ++ ((uint32_t)p[3] << 16); ++ break; ++ case 5: ++ val = p[1] | ((uint32_t)p[2] << 8) | ++ ((uint32_t)p[3] << 16) | ++ ((uint32_t)p[4] << 24); ++ } ++ val <<= (8 - b); ++ val |= *p >> b; ++ nrbits += b; ++ } else { ++ switch (bytes) { ++ case 1: ++ val = p[0]; ++ break; ++ case 2: ++ val = p[0] | ((uint32_t)p[1] << 8); ++ break; ++ case 3: ++ val = p[0] | ((uint32_t)p[1] << 8) | ++ ((uint32_t)p[2] << 16); ++ break; ++ case 4: ++ val = p[0] | ((uint32_t)p[1] << 8) | ++ ((uint32_t)p[2] << 16) | ++ ((uint32_t)p[3] << 24); ++ break; ++ } ++ } ++ val <<= k; ++ val >>= k; ++ b = nrbits & 7; ++ p += nrbits >> 3; ++ *addr = p; ++ *pos = b; ++ ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); ++ return val; ++} ++ ++/** ++ * ubifs_pack_pnode - pack all the bit fields of a pnode. ++ * @c: UBIFS file-system description object ++ * @buf: buffer into which to pack ++ * @pnode: pnode to pack ++ */ ++void ubifs_pack_pnode(struct ubifs_info *c, void *buf, ++ struct ubifs_pnode *pnode) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0; ++ uint16_t crc; ++ ++ pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS); ++ if (c->big_lpt) ++ pack_bits(&addr, &pos, pnode->num, c->pcnt_bits); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ pack_bits(&addr, &pos, pnode->lprops[i].free >> 3, ++ c->space_bits); ++ pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3, ++ c->space_bits); ++ if (pnode->lprops[i].flags & LPROPS_INDEX) ++ pack_bits(&addr, &pos, 1, 1); ++ else ++ pack_bits(&addr, &pos, 0, 1); ++ } ++ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, ++ c->pnode_sz - UBIFS_LPT_CRC_BYTES); ++ addr = buf; ++ pos = 0; ++ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); ++} ++ ++/** ++ * ubifs_pack_nnode - pack all the bit fields of a nnode. ++ * @c: UBIFS file-system description object ++ * @buf: buffer into which to pack ++ * @nnode: nnode to pack ++ */ ++void ubifs_pack_nnode(struct ubifs_info *c, void *buf, ++ struct ubifs_nnode *nnode) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0; ++ uint16_t crc; ++ ++ pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS); ++ if (c->big_lpt) ++ pack_bits(&addr, &pos, nnode->num, c->pcnt_bits); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ int lnum = nnode->nbranch[i].lnum; ++ ++ if (lnum == 0) ++ lnum = c->lpt_last + 1; ++ pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits); ++ pack_bits(&addr, &pos, nnode->nbranch[i].offs, ++ c->lpt_offs_bits); ++ } ++ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, ++ c->nnode_sz - UBIFS_LPT_CRC_BYTES); ++ addr = buf; ++ pos = 0; ++ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); ++} ++ ++/** ++ * ubifs_pack_ltab - pack the LPT's own lprops table. ++ * @c: UBIFS file-system description object ++ * @buf: buffer into which to pack ++ * @ltab: LPT's own lprops table to pack ++ */ ++void ubifs_pack_ltab(struct ubifs_info *c, void *buf, ++ struct ubifs_lpt_lprops *ltab) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0; ++ uint16_t crc; ++ ++ pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS); ++ for (i = 0; i < c->lpt_lebs; i++) { ++ pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits); ++ pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits); ++ } ++ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, ++ c->ltab_sz - UBIFS_LPT_CRC_BYTES); ++ addr = buf; ++ pos = 0; ++ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); ++} ++ ++/** ++ * ubifs_pack_lsave - pack the LPT's save table. ++ * @c: UBIFS file-system description object ++ * @buf: buffer into which to pack ++ * @lsave: LPT's save table to pack ++ */ ++void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0; ++ uint16_t crc; ++ ++ pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS); ++ for (i = 0; i < c->lsave_cnt; i++) ++ pack_bits(&addr, &pos, lsave[i], c->lnum_bits); ++ crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, ++ c->lsave_sz - UBIFS_LPT_CRC_BYTES); ++ addr = buf; ++ pos = 0; ++ pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS); ++} ++ ++/** ++ * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to which to add dirty space ++ * @dirty: amount of dirty space to add ++ */ ++void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) ++{ ++ if (!dirty || !lnum) ++ return; ++ dbg_lp("LEB %d add %d to %d", ++ lnum, dirty, c->ltab[lnum - c->lpt_first].dirty); ++ ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); ++ c->ltab[lnum - c->lpt_first].dirty += dirty; ++} ++ ++/** ++ * set_ltab - set LPT LEB properties. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number ++ * @free: amount of free space ++ * @dirty: amount of dirty space ++ */ ++static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty) ++{ ++ dbg_lp("LEB %d free %d dirty %d to %d %d", ++ lnum, c->ltab[lnum - c->lpt_first].free, ++ c->ltab[lnum - c->lpt_first].dirty, free, dirty); ++ ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); ++ c->ltab[lnum - c->lpt_first].free = free; ++ c->ltab[lnum - c->lpt_first].dirty = dirty; ++} ++ ++/** ++ * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. ++ * @c: UBIFS file-system description object ++ * @nnode: nnode for which to add dirt ++ */ ++void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode) ++{ ++ struct ubifs_nnode *np = nnode->parent; ++ ++ if (np) ++ ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum, ++ c->nnode_sz); ++ else { ++ ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz); ++ if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { ++ c->lpt_drty_flgs |= LTAB_DIRTY; ++ ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); ++ } ++ } ++} ++ ++/** ++ * add_pnode_dirt - add dirty space to LPT LEB properties. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode for which to add dirt ++ */ ++static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) ++{ ++ ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, ++ c->pnode_sz); ++} ++ ++/** ++ * calc_nnode_num - calculate nnode number. ++ * @row: the row in the tree (root is zero) ++ * @col: the column in the row (leftmost is zero) ++ * ++ * The nnode number is a number that uniquely identifies a nnode and can be used ++ * easily to traverse the tree from the root to that nnode. ++ * ++ * This function calculates and returns the nnode number for the nnode at @row ++ * and @col. ++ */ ++static int calc_nnode_num(int row, int col) ++{ ++ int num, bits; ++ ++ num = 1; ++ while (row--) { ++ bits = (col & (UBIFS_LPT_FANOUT - 1)); ++ col >>= UBIFS_LPT_FANOUT_SHIFT; ++ num <<= UBIFS_LPT_FANOUT_SHIFT; ++ num |= bits; ++ } ++ return num; ++} ++ ++/** ++ * calc_nnode_num_from_parent - calculate nnode number. ++ * @c: UBIFS file-system description object ++ * @parent: parent nnode ++ * @iip: index in parent ++ * ++ * The nnode number is a number that uniquely identifies a nnode and can be used ++ * easily to traverse the tree from the root to that nnode. ++ * ++ * This function calculates and returns the nnode number based on the parent's ++ * nnode number and the index in parent. ++ */ ++static int calc_nnode_num_from_parent(const struct ubifs_info *c, ++ struct ubifs_nnode *parent, int iip) ++{ ++ int num, shft; ++ ++ if (!parent) ++ return 1; ++ shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT; ++ num = parent->num ^ (1 << shft); ++ num |= (UBIFS_LPT_FANOUT + iip) << shft; ++ return num; ++} ++ ++/** ++ * calc_pnode_num_from_parent - calculate pnode number. ++ * @c: UBIFS file-system description object ++ * @parent: parent nnode ++ * @iip: index in parent ++ * ++ * The pnode number is a number that uniquely identifies a pnode and can be used ++ * easily to traverse the tree from the root to that pnode. ++ * ++ * This function calculates and returns the pnode number based on the parent's ++ * nnode number and the index in parent. ++ */ ++static int calc_pnode_num_from_parent(const struct ubifs_info *c, ++ struct ubifs_nnode *parent, int iip) ++{ ++ int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; ++ ++ for (i = 0; i < n; i++) { ++ num <<= UBIFS_LPT_FANOUT_SHIFT; ++ num |= pnum & (UBIFS_LPT_FANOUT - 1); ++ pnum >>= UBIFS_LPT_FANOUT_SHIFT; ++ } ++ num <<= UBIFS_LPT_FANOUT_SHIFT; ++ num |= iip; ++ return num; ++} ++ ++/** ++ * ubifs_create_dflt_lpt - create default LPT. ++ * @c: UBIFS file-system description object ++ * @main_lebs: number of main area LEBs is passed and returned here ++ * @lpt_first: LEB number of first LPT LEB ++ * @lpt_lebs: number of LEBs for LPT is passed and returned here ++ * @big_lpt: use big LPT model is passed and returned here ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, ++ int *lpt_lebs, int *big_lpt) ++{ ++ int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row; ++ int blnum, boffs, bsz, bcnt; ++ struct ubifs_pnode *pnode = NULL; ++ struct ubifs_nnode *nnode = NULL; ++ void *buf = NULL, *p; ++ struct ubifs_lpt_lprops *ltab = NULL; ++ int *lsave = NULL; ++ ++ err = calc_dflt_lpt_geom(c, main_lebs, big_lpt); ++ if (err) ++ return err; ++ *lpt_lebs = c->lpt_lebs; ++ ++ /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */ ++ c->lpt_first = lpt_first; ++ /* Needed by 'set_ltab()' */ ++ c->lpt_last = lpt_first + c->lpt_lebs - 1; ++ /* Needed by 'ubifs_pack_lsave()' */ ++ c->main_first = c->leb_cnt - *main_lebs; ++ ++ lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL); ++ pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL); ++ nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL); ++ buf = vmalloc(c->leb_size); ++ ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); ++ if (!pnode || !nnode || !buf || !ltab || !lsave) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ ubifs_assert(!c->ltab); ++ c->ltab = ltab; /* Needed by set_ltab */ ++ ++ /* Initialize LPT's own lprops */ ++ for (i = 0; i < c->lpt_lebs; i++) { ++ ltab[i].free = c->leb_size; ++ ltab[i].dirty = 0; ++ ltab[i].tgc = 0; ++ ltab[i].cmt = 0; ++ } ++ ++ lnum = lpt_first; ++ p = buf; ++ /* Number of leaf nodes (pnodes) */ ++ cnt = c->pnode_cnt; ++ ++ /* ++ * The first pnode contains the LEB properties for the LEBs that contain ++ * the root inode node and the root index node of the index tree. ++ */ ++ node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8); ++ iopos = ALIGN(node_sz, c->min_io_size); ++ pnode->lprops[0].free = c->leb_size - iopos; ++ pnode->lprops[0].dirty = iopos - node_sz; ++ pnode->lprops[0].flags = LPROPS_INDEX; ++ ++ node_sz = UBIFS_INO_NODE_SZ; ++ iopos = ALIGN(node_sz, c->min_io_size); ++ pnode->lprops[1].free = c->leb_size - iopos; ++ pnode->lprops[1].dirty = iopos - node_sz; ++ ++ for (i = 2; i < UBIFS_LPT_FANOUT; i++) ++ pnode->lprops[i].free = c->leb_size; ++ ++ /* Add first pnode */ ++ ubifs_pack_pnode(c, p, pnode); ++ p += c->pnode_sz; ++ len = c->pnode_sz; ++ pnode->num += 1; ++ ++ /* Reset pnode values for remaining pnodes */ ++ pnode->lprops[0].free = c->leb_size; ++ pnode->lprops[0].dirty = 0; ++ pnode->lprops[0].flags = 0; ++ ++ pnode->lprops[1].free = c->leb_size; ++ pnode->lprops[1].dirty = 0; ++ ++ /* ++ * To calculate the internal node branches, we keep information about ++ * the level below. ++ */ ++ blnum = lnum; /* LEB number of level below */ ++ boffs = 0; /* Offset of level below */ ++ bcnt = cnt; /* Number of nodes in level below */ ++ bsz = c->pnode_sz; /* Size of nodes in level below */ ++ ++ /* Add all remaining pnodes */ ++ for (i = 1; i < cnt; i++) { ++ if (len + c->pnode_sz > c->leb_size) { ++ alen = ALIGN(len, c->min_io_size); ++ set_ltab(c, lnum, c->leb_size - alen, alen - len); ++ memset(p, 0xff, alen - len); ++ err = ubi_leb_change(c->ubi, lnum++, buf, alen, ++ UBI_SHORTTERM); ++ if (err) ++ goto out; ++ p = buf; ++ len = 0; ++ } ++ ubifs_pack_pnode(c, p, pnode); ++ p += c->pnode_sz; ++ len += c->pnode_sz; ++ /* ++ * pnodes are simply numbered left to right starting at zero, ++ * which means the pnode number can be used easily to traverse ++ * down the tree to the corresponding pnode. ++ */ ++ pnode->num += 1; ++ } ++ ++ row = 0; ++ for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT) ++ row += 1; ++ /* Add all nnodes, one level at a time */ ++ while (1) { ++ /* Number of internal nodes (nnodes) at next level */ ++ cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT); ++ for (i = 0; i < cnt; i++) { ++ if (len + c->nnode_sz > c->leb_size) { ++ alen = ALIGN(len, c->min_io_size); ++ set_ltab(c, lnum, c->leb_size - alen, ++ alen - len); ++ memset(p, 0xff, alen - len); ++ err = ubi_leb_change(c->ubi, lnum++, buf, alen, ++ UBI_SHORTTERM); ++ if (err) ++ goto out; ++ p = buf; ++ len = 0; ++ } ++ /* Only 1 nnode at this level, so it is the root */ ++ if (cnt == 1) { ++ c->lpt_lnum = lnum; ++ c->lpt_offs = len; ++ } ++ /* Set branches to the level below */ ++ for (j = 0; j < UBIFS_LPT_FANOUT; j++) { ++ if (bcnt) { ++ if (boffs + bsz > c->leb_size) { ++ blnum += 1; ++ boffs = 0; ++ } ++ nnode->nbranch[j].lnum = blnum; ++ nnode->nbranch[j].offs = boffs; ++ boffs += bsz; ++ bcnt--; ++ } else { ++ nnode->nbranch[j].lnum = 0; ++ nnode->nbranch[j].offs = 0; ++ } ++ } ++ nnode->num = calc_nnode_num(row, i); ++ ubifs_pack_nnode(c, p, nnode); ++ p += c->nnode_sz; ++ len += c->nnode_sz; ++ } ++ /* Only 1 nnode at this level, so it is the root */ ++ if (cnt == 1) ++ break; ++ /* Update the information about the level below */ ++ bcnt = cnt; ++ bsz = c->nnode_sz; ++ row -= 1; ++ } ++ ++ if (*big_lpt) { ++ /* Need to add LPT's save table */ ++ if (len + c->lsave_sz > c->leb_size) { ++ alen = ALIGN(len, c->min_io_size); ++ set_ltab(c, lnum, c->leb_size - alen, alen - len); ++ memset(p, 0xff, alen - len); ++ err = ubi_leb_change(c->ubi, lnum++, buf, alen, ++ UBI_SHORTTERM); ++ if (err) ++ goto out; ++ p = buf; ++ len = 0; ++ } ++ ++ c->lsave_lnum = lnum; ++ c->lsave_offs = len; ++ ++ for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++) ++ lsave[i] = c->main_first + i; ++ for (; i < c->lsave_cnt; i++) ++ lsave[i] = c->main_first; ++ ++ ubifs_pack_lsave(c, p, lsave); ++ p += c->lsave_sz; ++ len += c->lsave_sz; ++ } ++ ++ /* Need to add LPT's own LEB properties table */ ++ if (len + c->ltab_sz > c->leb_size) { ++ alen = ALIGN(len, c->min_io_size); ++ set_ltab(c, lnum, c->leb_size - alen, alen - len); ++ memset(p, 0xff, alen - len); ++ err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); ++ if (err) ++ goto out; ++ p = buf; ++ len = 0; ++ } ++ ++ c->ltab_lnum = lnum; ++ c->ltab_offs = len; ++ ++ /* Update ltab before packing it */ ++ len += c->ltab_sz; ++ alen = ALIGN(len, c->min_io_size); ++ set_ltab(c, lnum, c->leb_size - alen, alen - len); ++ ++ ubifs_pack_ltab(c, p, ltab); ++ p += c->ltab_sz; ++ ++ /* Write remaining buffer */ ++ memset(p, 0xff, alen - len); ++ err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); ++ if (err) ++ goto out; ++ ++ c->nhead_lnum = lnum; ++ c->nhead_offs = ALIGN(len, c->min_io_size); ++ ++ dbg_lp("space_bits %d", c->space_bits); ++ dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); ++ dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); ++ dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); ++ dbg_lp("pcnt_bits %d", c->pcnt_bits); ++ dbg_lp("lnum_bits %d", c->lnum_bits); ++ dbg_lp("pnode_sz %d", c->pnode_sz); ++ dbg_lp("nnode_sz %d", c->nnode_sz); ++ dbg_lp("ltab_sz %d", c->ltab_sz); ++ dbg_lp("lsave_sz %d", c->lsave_sz); ++ dbg_lp("lsave_cnt %d", c->lsave_cnt); ++ dbg_lp("lpt_hght %d", c->lpt_hght); ++ dbg_lp("big_lpt %d", c->big_lpt); ++ dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); ++ dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); ++ dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); ++ if (c->big_lpt) ++ dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); ++out: ++ c->ltab = NULL; ++ kfree(lsave); ++ vfree(ltab); ++ vfree(buf); ++ kfree(nnode); ++ kfree(pnode); ++ return err; ++} ++ ++/** ++ * update_cats - add LEB properties of a pnode to LEB category lists and heaps. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode ++ * ++ * When a pnode is loaded into memory, the LEB properties it contains are added, ++ * by this function, to the LEB category lists and heaps. ++ */ ++static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode) ++{ ++ int i; ++ ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK; ++ int lnum = pnode->lprops[i].lnum; ++ ++ if (!lnum) ++ return; ++ ubifs_add_to_cat(c, &pnode->lprops[i], cat); ++ } ++} ++ ++/** ++ * replace_cats - add LEB properties of a pnode to LEB category lists and heaps. ++ * @c: UBIFS file-system description object ++ * @old_pnode: pnode copied ++ * @new_pnode: pnode copy ++ * ++ * During commit it is sometimes necessary to copy a pnode ++ * (see dirty_cow_pnode). When that happens, references in ++ * category lists and heaps must be replaced. This function does that. ++ */ ++static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, ++ struct ubifs_pnode *new_pnode) ++{ ++ int i; ++ ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ if (!new_pnode->lprops[i].lnum) ++ return; ++ ubifs_replace_cat(c, &old_pnode->lprops[i], ++ &new_pnode->lprops[i]); ++ } ++} ++ ++/** ++ * check_lpt_crc - check LPT node crc is correct. ++ * @c: UBIFS file-system description object ++ * @buf: buffer containing node ++ * @len: length of node ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int check_lpt_crc(void *buf, int len) ++{ ++ int pos = 0; ++ uint8_t *addr = buf; ++ uint16_t crc, calc_crc; ++ ++ crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); ++ calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, ++ len - UBIFS_LPT_CRC_BYTES); ++ if (crc != calc_crc) { ++ ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, ++ calc_crc); ++ dbg_dump_stack(); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/** ++ * check_lpt_type - check LPT node type is correct. ++ * @c: UBIFS file-system description object ++ * @addr: address of type bit field is passed and returned updated here ++ * @pos: position of type bit field is passed and returned updated here ++ * @type: expected type ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int check_lpt_type(uint8_t **addr, int *pos, int type) ++{ ++ int node_type; ++ ++ node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); ++ if (node_type != type) { ++ ubifs_err("invalid type (%d) in LPT node type %d", node_type, ++ type); ++ dbg_dump_stack(); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/** ++ * unpack_pnode - unpack a pnode. ++ * @c: UBIFS file-system description object ++ * @buf: buffer containing packed pnode to unpack ++ * @pnode: pnode structure to fill ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int unpack_pnode(const struct ubifs_info *c, void *buf, ++ struct ubifs_pnode *pnode) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0, err; ++ ++ err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); ++ if (err) ++ return err; ++ if (c->big_lpt) ++ pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ struct ubifs_lprops * const lprops = &pnode->lprops[i]; ++ ++ lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits); ++ lprops->free <<= 3; ++ lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits); ++ lprops->dirty <<= 3; ++ ++ if (ubifs_unpack_bits(&addr, &pos, 1)) ++ lprops->flags = LPROPS_INDEX; ++ else ++ lprops->flags = 0; ++ lprops->flags |= ubifs_categorize_lprops(c, lprops); ++ } ++ err = check_lpt_crc(buf, c->pnode_sz); ++ return err; ++} ++ ++/** ++ * ubifs_unpack_nnode - unpack a nnode. ++ * @c: UBIFS file-system description object ++ * @buf: buffer containing packed nnode to unpack ++ * @nnode: nnode structure to fill ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, ++ struct ubifs_nnode *nnode) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0, err; ++ ++ err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); ++ if (err) ++ return err; ++ if (c->big_lpt) ++ nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ int lnum; ++ ++ lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) + ++ c->lpt_first; ++ if (lnum == c->lpt_last + 1) ++ lnum = 0; ++ nnode->nbranch[i].lnum = lnum; ++ nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, ++ c->lpt_offs_bits); ++ } ++ err = check_lpt_crc(buf, c->nnode_sz); ++ return err; ++} ++ ++/** ++ * unpack_ltab - unpack the LPT's own lprops table. ++ * @c: UBIFS file-system description object ++ * @buf: buffer from which to unpack ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int unpack_ltab(const struct ubifs_info *c, void *buf) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0, err; ++ ++ err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); ++ if (err) ++ return err; ++ for (i = 0; i < c->lpt_lebs; i++) { ++ int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); ++ int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); ++ ++ if (free < 0 || free > c->leb_size || dirty < 0 || ++ dirty > c->leb_size || free + dirty > c->leb_size) ++ return -EINVAL; ++ ++ c->ltab[i].free = free; ++ c->ltab[i].dirty = dirty; ++ c->ltab[i].tgc = 0; ++ c->ltab[i].cmt = 0; ++ } ++ err = check_lpt_crc(buf, c->ltab_sz); ++ return err; ++} ++ ++/** ++ * unpack_lsave - unpack the LPT's save table. ++ * @c: UBIFS file-system description object ++ * @buf: buffer from which to unpack ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int unpack_lsave(const struct ubifs_info *c, void *buf) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int i, pos = 0, err; ++ ++ err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); ++ if (err) ++ return err; ++ for (i = 0; i < c->lsave_cnt; i++) { ++ int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits); ++ ++ if (lnum < c->main_first || lnum >= c->leb_cnt) ++ return -EINVAL; ++ c->lsave[i] = lnum; ++ } ++ err = check_lpt_crc(buf, c->lsave_sz); ++ return err; ++} ++ ++/** ++ * validate_nnode - validate a nnode. ++ * @c: UBIFS file-system description object ++ * @nnode: nnode to validate ++ * @parent: parent nnode (or NULL for the root nnode) ++ * @iip: index in parent ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode, ++ struct ubifs_nnode *parent, int iip) ++{ ++ int i, lvl, max_offs; ++ ++ if (c->big_lpt) { ++ int num = calc_nnode_num_from_parent(c, parent, iip); ++ ++ if (nnode->num != num) ++ return -EINVAL; ++ } ++ lvl = parent ? parent->level - 1 : c->lpt_hght; ++ if (lvl < 1) ++ return -EINVAL; ++ if (lvl == 1) ++ max_offs = c->leb_size - c->pnode_sz; ++ else ++ max_offs = c->leb_size - c->nnode_sz; ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ int lnum = nnode->nbranch[i].lnum; ++ int offs = nnode->nbranch[i].offs; ++ ++ if (lnum == 0) { ++ if (offs != 0) ++ return -EINVAL; ++ continue; ++ } ++ if (lnum < c->lpt_first || lnum > c->lpt_last) ++ return -EINVAL; ++ if (offs < 0 || offs > max_offs) ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/** ++ * validate_pnode - validate a pnode. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode to validate ++ * @parent: parent nnode ++ * @iip: index in parent ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode, ++ struct ubifs_nnode *parent, int iip) ++{ ++ int i; ++ ++ if (c->big_lpt) { ++ int num = calc_pnode_num_from_parent(c, parent, iip); ++ ++ if (pnode->num != num) ++ return -EINVAL; ++ } ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ int free = pnode->lprops[i].free; ++ int dirty = pnode->lprops[i].dirty; ++ ++ if (free < 0 || free > c->leb_size || free % c->min_io_size || ++ (free & 7)) ++ return -EINVAL; ++ if (dirty < 0 || dirty > c->leb_size || (dirty & 7)) ++ return -EINVAL; ++ if (dirty + free > c->leb_size) ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/** ++ * set_pnode_lnum - set LEB numbers on a pnode. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode to update ++ * ++ * This function calculates the LEB numbers for the LEB properties it contains ++ * based on the pnode number. ++ */ ++static void set_pnode_lnum(const struct ubifs_info *c, ++ struct ubifs_pnode *pnode) ++{ ++ int i, lnum; ++ ++ lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first; ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ if (lnum >= c->leb_cnt) ++ return; ++ pnode->lprops[i].lnum = lnum++; ++ } ++} ++ ++/** ++ * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory. ++ * @c: UBIFS file-system description object ++ * @parent: parent nnode (or NULL for the root) ++ * @iip: index in parent ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) ++{ ++ struct ubifs_nbranch *branch = NULL; ++ struct ubifs_nnode *nnode = NULL; ++ void *buf = c->lpt_nod_buf; ++ int err, lnum, offs; ++ ++ if (parent) { ++ branch = &parent->nbranch[iip]; ++ lnum = branch->lnum; ++ offs = branch->offs; ++ } else { ++ lnum = c->lpt_lnum; ++ offs = c->lpt_offs; ++ } ++ nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS); ++ if (!nnode) { ++ err = -ENOMEM; ++ goto out; ++ } ++ if (lnum == 0) { ++ /* ++ * This nnode was not written which just means that the LEB ++ * properties in the subtree below it describe empty LEBs. We ++ * make the nnode as though we had read it, which in fact means ++ * doing almost nothing. ++ */ ++ if (c->big_lpt) ++ nnode->num = calc_nnode_num_from_parent(c, parent, iip); ++ } else { ++ err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); ++ if (err) ++ goto out; ++ err = ubifs_unpack_nnode(c, buf, nnode); ++ if (err) ++ goto out; ++ } ++ err = validate_nnode(c, nnode, parent, iip); ++ if (err) ++ goto out; ++ if (!c->big_lpt) ++ nnode->num = calc_nnode_num_from_parent(c, parent, iip); ++ if (parent) { ++ branch->nnode = nnode; ++ nnode->level = parent->level - 1; ++ } else { ++ c->nroot = nnode; ++ nnode->level = c->lpt_hght; ++ } ++ nnode->parent = parent; ++ nnode->iip = iip; ++ return 0; ++ ++out: ++ ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); ++ kfree(nnode); ++ return err; ++} ++ ++/** ++ * read_pnode - read a pnode from flash and link it to the tree in memory. ++ * @c: UBIFS file-system description object ++ * @parent: parent nnode ++ * @iip: index in parent ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) ++{ ++ struct ubifs_nbranch *branch; ++ struct ubifs_pnode *pnode = NULL; ++ void *buf = c->lpt_nod_buf; ++ int err, lnum, offs; ++ ++ branch = &parent->nbranch[iip]; ++ lnum = branch->lnum; ++ offs = branch->offs; ++ pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); ++ if (!pnode) { ++ err = -ENOMEM; ++ goto out; ++ } ++ if (lnum == 0) { ++ /* ++ * This pnode was not written which just means that the LEB ++ * properties in it describe empty LEBs. We make the pnode as ++ * though we had read it. ++ */ ++ int i; ++ ++ if (c->big_lpt) ++ pnode->num = calc_pnode_num_from_parent(c, parent, iip); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ struct ubifs_lprops * const lprops = &pnode->lprops[i]; ++ ++ lprops->free = c->leb_size; ++ lprops->flags = ubifs_categorize_lprops(c, lprops); ++ } ++ } else { ++ err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); ++ if (err) ++ goto out; ++ err = unpack_pnode(c, buf, pnode); ++ if (err) ++ goto out; ++ } ++ err = validate_pnode(c, pnode, parent, iip); ++ if (err) ++ goto out; ++ if (!c->big_lpt) ++ pnode->num = calc_pnode_num_from_parent(c, parent, iip); ++ branch->pnode = pnode; ++ pnode->parent = parent; ++ pnode->iip = iip; ++ set_pnode_lnum(c, pnode); ++ c->pnodes_have += 1; ++ return 0; ++ ++out: ++ ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); ++ dbg_dump_pnode(c, pnode, parent, iip); ++ dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); ++ kfree(pnode); ++ return err; ++} ++ ++/** ++ * read_ltab - read LPT's own lprops table. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int read_ltab(struct ubifs_info *c) ++{ ++ int err; ++ void *buf; ++ ++ buf = vmalloc(c->ltab_sz); ++ if (!buf) ++ return -ENOMEM; ++ err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); ++ if (err) ++ goto out; ++ err = unpack_ltab(c, buf); ++out: ++ vfree(buf); ++ return err; ++} ++ ++/** ++ * read_lsave - read LPT's save table. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int read_lsave(struct ubifs_info *c) ++{ ++ int err, i; ++ void *buf; ++ ++ buf = vmalloc(c->lsave_sz); ++ if (!buf) ++ return -ENOMEM; ++ err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); ++ if (err) ++ goto out; ++ err = unpack_lsave(c, buf); ++ if (err) ++ goto out; ++ for (i = 0; i < c->lsave_cnt; i++) { ++ int lnum = c->lsave[i]; ++ ++ /* ++ * Due to automatic resizing, the values in the lsave table ++ * could be beyond the volume size - just ignore them. ++ */ ++ if (lnum >= c->leb_cnt) ++ continue; ++ ubifs_lpt_lookup(c, lnum); ++ } ++out: ++ vfree(buf); ++ return err; ++} ++ ++/** ++ * ubifs_get_nnode - get a nnode. ++ * @c: UBIFS file-system description object ++ * @parent: parent nnode (or NULL for the root) ++ * @iip: index in parent ++ * ++ * This function returns a pointer to the nnode on success or a negative error ++ * code on failure. ++ */ ++struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, ++ struct ubifs_nnode *parent, int iip) ++{ ++ struct ubifs_nbranch *branch; ++ struct ubifs_nnode *nnode; ++ int err; ++ ++ branch = &parent->nbranch[iip]; ++ nnode = branch->nnode; ++ if (nnode) ++ return nnode; ++ err = ubifs_read_nnode(c, parent, iip); ++ if (err) ++ return ERR_PTR(err); ++ return branch->nnode; ++} ++ ++/** ++ * ubifs_get_pnode - get a pnode. ++ * @c: UBIFS file-system description object ++ * @parent: parent nnode ++ * @iip: index in parent ++ * ++ * This function returns a pointer to the pnode on success or a negative error ++ * code on failure. ++ */ ++struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, ++ struct ubifs_nnode *parent, int iip) ++{ ++ struct ubifs_nbranch *branch; ++ struct ubifs_pnode *pnode; ++ int err; ++ ++ branch = &parent->nbranch[iip]; ++ pnode = branch->pnode; ++ if (pnode) ++ return pnode; ++ err = read_pnode(c, parent, iip); ++ if (err) ++ return ERR_PTR(err); ++ update_cats(c, branch->pnode); ++ return branch->pnode; ++} ++ ++/** ++ * ubifs_lpt_lookup - lookup LEB properties in the LPT. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to lookup ++ * ++ * This function returns a pointer to the LEB properties on success or a ++ * negative error code on failure. ++ */ ++struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) ++{ ++ int err, i, h, iip, shft; ++ struct ubifs_nnode *nnode; ++ struct ubifs_pnode *pnode; ++ ++ if (!c->nroot) { ++ err = ubifs_read_nnode(c, NULL, 0); ++ if (err) ++ return ERR_PTR(err); ++ } ++ nnode = c->nroot; ++ i = lnum - c->main_first; ++ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; ++ for (h = 1; h < c->lpt_hght; h++) { ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ shft -= UBIFS_LPT_FANOUT_SHIFT; ++ nnode = ubifs_get_nnode(c, nnode, iip); ++ if (IS_ERR(nnode)) ++ return ERR_PTR(PTR_ERR(nnode)); ++ } ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ shft -= UBIFS_LPT_FANOUT_SHIFT; ++ pnode = ubifs_get_pnode(c, nnode, iip); ++ if (IS_ERR(pnode)) ++ return ERR_PTR(PTR_ERR(pnode)); ++ iip = (i & (UBIFS_LPT_FANOUT - 1)); ++ dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, ++ pnode->lprops[iip].free, pnode->lprops[iip].dirty, ++ pnode->lprops[iip].flags); ++ return &pnode->lprops[iip]; ++} ++ ++/** ++ * dirty_cow_nnode - ensure a nnode is not being committed. ++ * @c: UBIFS file-system description object ++ * @nnode: nnode to check ++ * ++ * Returns dirtied nnode on success or negative error code on failure. ++ */ ++static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, ++ struct ubifs_nnode *nnode) ++{ ++ struct ubifs_nnode *n; ++ int i; ++ ++ if (!test_bit(COW_CNODE, &nnode->flags)) { ++ /* nnode is not being committed */ ++ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { ++ c->dirty_nn_cnt += 1; ++ ubifs_add_nnode_dirt(c, nnode); ++ } ++ return nnode; ++ } ++ ++ /* nnode is being committed, so copy it */ ++ n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); ++ if (unlikely(!n)) ++ return ERR_PTR(-ENOMEM); ++ ++ memcpy(n, nnode, sizeof(struct ubifs_nnode)); ++ n->cnext = NULL; ++ __set_bit(DIRTY_CNODE, &n->flags); ++ __clear_bit(COW_CNODE, &n->flags); ++ ++ /* The children now have new parent */ ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ struct ubifs_nbranch *branch = &n->nbranch[i]; ++ ++ if (branch->cnode) ++ branch->cnode->parent = n; ++ } ++ ++ ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags)); ++ __set_bit(OBSOLETE_CNODE, &nnode->flags); ++ ++ c->dirty_nn_cnt += 1; ++ ubifs_add_nnode_dirt(c, nnode); ++ if (nnode->parent) ++ nnode->parent->nbranch[n->iip].nnode = n; ++ else ++ c->nroot = n; ++ return n; ++} ++ ++/** ++ * dirty_cow_pnode - ensure a pnode is not being committed. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode to check ++ * ++ * Returns dirtied pnode on success or negative error code on failure. ++ */ ++static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, ++ struct ubifs_pnode *pnode) ++{ ++ struct ubifs_pnode *p; ++ ++ if (!test_bit(COW_CNODE, &pnode->flags)) { ++ /* pnode is not being committed */ ++ if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { ++ c->dirty_pn_cnt += 1; ++ add_pnode_dirt(c, pnode); ++ } ++ return pnode; ++ } ++ ++ /* pnode is being committed, so copy it */ ++ p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); ++ if (unlikely(!p)) ++ return ERR_PTR(-ENOMEM); ++ ++ memcpy(p, pnode, sizeof(struct ubifs_pnode)); ++ p->cnext = NULL; ++ __set_bit(DIRTY_CNODE, &p->flags); ++ __clear_bit(COW_CNODE, &p->flags); ++ replace_cats(c, pnode, p); ++ ++ ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags)); ++ __set_bit(OBSOLETE_CNODE, &pnode->flags); ++ ++ c->dirty_pn_cnt += 1; ++ add_pnode_dirt(c, pnode); ++ pnode->parent->nbranch[p->iip].pnode = p; ++ return p; ++} ++ ++/** ++ * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to lookup ++ * ++ * This function returns a pointer to the LEB properties on success or a ++ * negative error code on failure. ++ */ ++struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) ++{ ++ int err, i, h, iip, shft; ++ struct ubifs_nnode *nnode; ++ struct ubifs_pnode *pnode; ++ ++ if (!c->nroot) { ++ err = ubifs_read_nnode(c, NULL, 0); ++ if (err) ++ return ERR_PTR(err); ++ } ++ nnode = c->nroot; ++ nnode = dirty_cow_nnode(c, nnode); ++ if (IS_ERR(nnode)) ++ return ERR_PTR(PTR_ERR(nnode)); ++ i = lnum - c->main_first; ++ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; ++ for (h = 1; h < c->lpt_hght; h++) { ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ shft -= UBIFS_LPT_FANOUT_SHIFT; ++ nnode = ubifs_get_nnode(c, nnode, iip); ++ if (IS_ERR(nnode)) ++ return ERR_PTR(PTR_ERR(nnode)); ++ nnode = dirty_cow_nnode(c, nnode); ++ if (IS_ERR(nnode)) ++ return ERR_PTR(PTR_ERR(nnode)); ++ } ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ shft -= UBIFS_LPT_FANOUT_SHIFT; ++ pnode = ubifs_get_pnode(c, nnode, iip); ++ if (IS_ERR(pnode)) ++ return ERR_PTR(PTR_ERR(pnode)); ++ pnode = dirty_cow_pnode(c, pnode); ++ if (IS_ERR(pnode)) ++ return ERR_PTR(PTR_ERR(pnode)); ++ iip = (i & (UBIFS_LPT_FANOUT - 1)); ++ dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, ++ pnode->lprops[iip].free, pnode->lprops[iip].dirty, ++ pnode->lprops[iip].flags); ++ ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags)); ++ return &pnode->lprops[iip]; ++} ++ ++/** ++ * lpt_init_rd - initialize the LPT for reading. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int lpt_init_rd(struct ubifs_info *c) ++{ ++ int err, i; ++ ++ c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); ++ if (!c->ltab) ++ return -ENOMEM; ++ ++ i = max_t(int, c->nnode_sz, c->pnode_sz); ++ c->lpt_nod_buf = kmalloc(i, GFP_KERNEL); ++ if (!c->lpt_nod_buf) ++ return -ENOMEM; ++ ++ for (i = 0; i < LPROPS_HEAP_CNT; i++) { ++ c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, ++ GFP_KERNEL); ++ if (!c->lpt_heap[i].arr) ++ return -ENOMEM; ++ c->lpt_heap[i].cnt = 0; ++ c->lpt_heap[i].max_cnt = LPT_HEAP_SZ; ++ } ++ ++ c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL); ++ if (!c->dirty_idx.arr) ++ return -ENOMEM; ++ c->dirty_idx.cnt = 0; ++ c->dirty_idx.max_cnt = LPT_HEAP_SZ; ++ ++ err = read_ltab(c); ++ if (err) ++ return err; ++ ++ dbg_lp("space_bits %d", c->space_bits); ++ dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); ++ dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); ++ dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); ++ dbg_lp("pcnt_bits %d", c->pcnt_bits); ++ dbg_lp("lnum_bits %d", c->lnum_bits); ++ dbg_lp("pnode_sz %d", c->pnode_sz); ++ dbg_lp("nnode_sz %d", c->nnode_sz); ++ dbg_lp("ltab_sz %d", c->ltab_sz); ++ dbg_lp("lsave_sz %d", c->lsave_sz); ++ dbg_lp("lsave_cnt %d", c->lsave_cnt); ++ dbg_lp("lpt_hght %d", c->lpt_hght); ++ dbg_lp("big_lpt %d", c->big_lpt); ++ dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); ++ dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); ++ dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); ++ if (c->big_lpt) ++ dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); ++ ++ return 0; ++} ++ ++/** ++ * lpt_init_wr - initialize the LPT for writing. ++ * @c: UBIFS file-system description object ++ * ++ * 'lpt_init_rd()' must have been called already. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int lpt_init_wr(struct ubifs_info *c) ++{ ++ int err, i; ++ ++ c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); ++ if (!c->ltab_cmt) ++ return -ENOMEM; ++ ++ c->lpt_buf = vmalloc(c->leb_size); ++ if (!c->lpt_buf) ++ return -ENOMEM; ++ ++ if (c->big_lpt) { ++ c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS); ++ if (!c->lsave) ++ return -ENOMEM; ++ err = read_lsave(c); ++ if (err) ++ return err; ++ } ++ ++ for (i = 0; i < c->lpt_lebs; i++) ++ if (c->ltab[i].free == c->leb_size) { ++ err = ubifs_leb_unmap(c, i + c->lpt_first); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * ubifs_lpt_init - initialize the LPT. ++ * @c: UBIFS file-system description object ++ * @rd: whether to initialize lpt for reading ++ * @wr: whether to initialize lpt for writing ++ * ++ * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true ++ * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is ++ * true. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) ++{ ++ int err; ++ ++ if (rd) { ++ err = lpt_init_rd(c); ++ if (err) ++ return err; ++ } ++ ++ if (wr) { ++ err = lpt_init_wr(c); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * struct lpt_scan_node - somewhere to put nodes while we scan LPT. ++ * @nnode: where to keep a nnode ++ * @pnode: where to keep a pnode ++ * @cnode: where to keep a cnode ++ * @in_tree: is the node in the tree in memory ++ * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in ++ * the tree ++ * @ptr.pnode: ditto for pnode ++ * @ptr.cnode: ditto for cnode ++ */ ++struct lpt_scan_node { ++ union { ++ struct ubifs_nnode nnode; ++ struct ubifs_pnode pnode; ++ struct ubifs_cnode cnode; ++ }; ++ int in_tree; ++ union { ++ struct ubifs_nnode *nnode; ++ struct ubifs_pnode *pnode; ++ struct ubifs_cnode *cnode; ++ } ptr; ++}; ++ ++/** ++ * scan_get_nnode - for the scan, get a nnode from either the tree or flash. ++ * @c: the UBIFS file-system description object ++ * @path: where to put the nnode ++ * @parent: parent of the nnode ++ * @iip: index in parent of the nnode ++ * ++ * This function returns a pointer to the nnode on success or a negative error ++ * code on failure. ++ */ ++static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, ++ struct lpt_scan_node *path, ++ struct ubifs_nnode *parent, int iip) ++{ ++ struct ubifs_nbranch *branch; ++ struct ubifs_nnode *nnode; ++ void *buf = c->lpt_nod_buf; ++ int err; ++ ++ branch = &parent->nbranch[iip]; ++ nnode = branch->nnode; ++ if (nnode) { ++ path->in_tree = 1; ++ path->ptr.nnode = nnode; ++ return nnode; ++ } ++ nnode = &path->nnode; ++ path->in_tree = 0; ++ path->ptr.nnode = nnode; ++ memset(nnode, 0, sizeof(struct ubifs_nnode)); ++ if (branch->lnum == 0) { ++ /* ++ * This nnode was not written which just means that the LEB ++ * properties in the subtree below it describe empty LEBs. We ++ * make the nnode as though we had read it, which in fact means ++ * doing almost nothing. ++ */ ++ if (c->big_lpt) ++ nnode->num = calc_nnode_num_from_parent(c, parent, iip); ++ } else { ++ err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, ++ c->nnode_sz); ++ if (err) ++ return ERR_PTR(err); ++ err = ubifs_unpack_nnode(c, buf, nnode); ++ if (err) ++ return ERR_PTR(err); ++ } ++ err = validate_nnode(c, nnode, parent, iip); ++ if (err) ++ return ERR_PTR(err); ++ if (!c->big_lpt) ++ nnode->num = calc_nnode_num_from_parent(c, parent, iip); ++ nnode->level = parent->level - 1; ++ nnode->parent = parent; ++ nnode->iip = iip; ++ return nnode; ++} ++ ++/** ++ * scan_get_pnode - for the scan, get a pnode from either the tree or flash. ++ * @c: the UBIFS file-system description object ++ * @path: where to put the pnode ++ * @parent: parent of the pnode ++ * @iip: index in parent of the pnode ++ * ++ * This function returns a pointer to the pnode on success or a negative error ++ * code on failure. ++ */ ++static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, ++ struct lpt_scan_node *path, ++ struct ubifs_nnode *parent, int iip) ++{ ++ struct ubifs_nbranch *branch; ++ struct ubifs_pnode *pnode; ++ void *buf = c->lpt_nod_buf; ++ int err; ++ ++ branch = &parent->nbranch[iip]; ++ pnode = branch->pnode; ++ if (pnode) { ++ path->in_tree = 1; ++ path->ptr.pnode = pnode; ++ return pnode; ++ } ++ pnode = &path->pnode; ++ path->in_tree = 0; ++ path->ptr.pnode = pnode; ++ memset(pnode, 0, sizeof(struct ubifs_pnode)); ++ if (branch->lnum == 0) { ++ /* ++ * This pnode was not written which just means that the LEB ++ * properties in it describe empty LEBs. We make the pnode as ++ * though we had read it. ++ */ ++ int i; ++ ++ if (c->big_lpt) ++ pnode->num = calc_pnode_num_from_parent(c, parent, iip); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ struct ubifs_lprops * const lprops = &pnode->lprops[i]; ++ ++ lprops->free = c->leb_size; ++ lprops->flags = ubifs_categorize_lprops(c, lprops); ++ } ++ } else { ++ ubifs_assert(branch->lnum >= c->lpt_first && ++ branch->lnum <= c->lpt_last); ++ ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); ++ err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, ++ c->pnode_sz); ++ if (err) ++ return ERR_PTR(err); ++ err = unpack_pnode(c, buf, pnode); ++ if (err) ++ return ERR_PTR(err); ++ } ++ err = validate_pnode(c, pnode, parent, iip); ++ if (err) ++ return ERR_PTR(err); ++ if (!c->big_lpt) ++ pnode->num = calc_pnode_num_from_parent(c, parent, iip); ++ pnode->parent = parent; ++ pnode->iip = iip; ++ set_pnode_lnum(c, pnode); ++ return pnode; ++} ++ ++/** ++ * ubifs_lpt_scan_nolock - scan the LPT. ++ * @c: the UBIFS file-system description object ++ * @start_lnum: LEB number from which to start scanning ++ * @end_lnum: LEB number at which to stop scanning ++ * @scan_cb: callback function called for each lprops ++ * @data: data to be passed to the callback function ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, ++ ubifs_lpt_scan_callback scan_cb, void *data) ++{ ++ int err = 0, i, h, iip, shft; ++ struct ubifs_nnode *nnode; ++ struct ubifs_pnode *pnode; ++ struct lpt_scan_node *path; ++ ++ if (start_lnum == -1) { ++ start_lnum = end_lnum + 1; ++ if (start_lnum >= c->leb_cnt) ++ start_lnum = c->main_first; ++ } ++ ++ ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt); ++ ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt); ++ ++ if (!c->nroot) { ++ err = ubifs_read_nnode(c, NULL, 0); ++ if (err) ++ return err; ++ } ++ ++ path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1), ++ GFP_NOFS); ++ if (!path) ++ return -ENOMEM; ++ ++ path[0].ptr.nnode = c->nroot; ++ path[0].in_tree = 1; ++again: ++ /* Descend to the pnode containing start_lnum */ ++ nnode = c->nroot; ++ i = start_lnum - c->main_first; ++ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; ++ for (h = 1; h < c->lpt_hght; h++) { ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ shft -= UBIFS_LPT_FANOUT_SHIFT; ++ nnode = scan_get_nnode(c, path + h, nnode, iip); ++ if (IS_ERR(nnode)) { ++ err = PTR_ERR(nnode); ++ goto out; ++ } ++ } ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ shft -= UBIFS_LPT_FANOUT_SHIFT; ++ pnode = scan_get_pnode(c, path + h, nnode, iip); ++ if (IS_ERR(pnode)) { ++ err = PTR_ERR(pnode); ++ goto out; ++ } ++ iip = (i & (UBIFS_LPT_FANOUT - 1)); ++ ++ /* Loop for each lprops */ ++ while (1) { ++ struct ubifs_lprops *lprops = &pnode->lprops[iip]; ++ int ret, lnum = lprops->lnum; ++ ++ ret = scan_cb(c, lprops, path[h].in_tree, data); ++ if (ret < 0) { ++ err = ret; ++ goto out; ++ } ++ if (ret & LPT_SCAN_ADD) { ++ /* Add all the nodes in path to the tree in memory */ ++ for (h = 1; h < c->lpt_hght; h++) { ++ const size_t sz = sizeof(struct ubifs_nnode); ++ struct ubifs_nnode *parent; ++ ++ if (path[h].in_tree) ++ continue; ++ nnode = kmalloc(sz, GFP_NOFS); ++ if (!nnode) { ++ err = -ENOMEM; ++ goto out; ++ } ++ memcpy(nnode, &path[h].nnode, sz); ++ parent = nnode->parent; ++ parent->nbranch[nnode->iip].nnode = nnode; ++ path[h].ptr.nnode = nnode; ++ path[h].in_tree = 1; ++ path[h + 1].cnode.parent = nnode; ++ } ++ if (path[h].in_tree) ++ ubifs_ensure_cat(c, lprops); ++ else { ++ const size_t sz = sizeof(struct ubifs_pnode); ++ struct ubifs_nnode *parent; ++ ++ pnode = kmalloc(sz, GFP_NOFS); ++ if (!pnode) { ++ err = -ENOMEM; ++ goto out; ++ } ++ memcpy(pnode, &path[h].pnode, sz); ++ parent = pnode->parent; ++ parent->nbranch[pnode->iip].pnode = pnode; ++ path[h].ptr.pnode = pnode; ++ path[h].in_tree = 1; ++ update_cats(c, pnode); ++ c->pnodes_have += 1; ++ } ++ err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *) ++ c->nroot, 0, 0); ++ if (err) ++ goto out; ++ err = dbg_check_cats(c); ++ if (err) ++ goto out; ++ } ++ if (ret & LPT_SCAN_STOP) { ++ err = 0; ++ break; ++ } ++ /* Get the next lprops */ ++ if (lnum == end_lnum) { ++ /* ++ * We got to the end without finding what we were ++ * looking for ++ */ ++ err = -ENOSPC; ++ goto out; ++ } ++ if (lnum + 1 >= c->leb_cnt) { ++ /* Wrap-around to the beginning */ ++ start_lnum = c->main_first; ++ goto again; ++ } ++ if (iip + 1 < UBIFS_LPT_FANOUT) { ++ /* Next lprops is in the same pnode */ ++ iip += 1; ++ continue; ++ } ++ /* We need to get the next pnode. Go up until we can go right */ ++ iip = pnode->iip; ++ while (1) { ++ h -= 1; ++ ubifs_assert(h >= 0); ++ nnode = path[h].ptr.nnode; ++ if (iip + 1 < UBIFS_LPT_FANOUT) ++ break; ++ iip = nnode->iip; ++ } ++ /* Go right */ ++ iip += 1; ++ /* Descend to the pnode */ ++ h += 1; ++ for (; h < c->lpt_hght; h++) { ++ nnode = scan_get_nnode(c, path + h, nnode, iip); ++ if (IS_ERR(nnode)) { ++ err = PTR_ERR(nnode); ++ goto out; ++ } ++ iip = 0; ++ } ++ pnode = scan_get_pnode(c, path + h, nnode, iip); ++ if (IS_ERR(pnode)) { ++ err = PTR_ERR(pnode); ++ goto out; ++ } ++ iip = 0; ++ } ++out: ++ kfree(path); ++ return err; ++} ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++/** ++ * dbg_chk_pnode - check a pnode. ++ * @c: the UBIFS file-system description object ++ * @pnode: pnode to check ++ * @col: pnode column ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, ++ int col) ++{ ++ int i; ++ ++ if (pnode->num != col) { ++ dbg_err("pnode num %d expected %d parent num %d iip %d", ++ pnode->num, col, pnode->parent->num, pnode->iip); ++ return -EINVAL; ++ } ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ struct ubifs_lprops *lp, *lprops = &pnode->lprops[i]; ++ int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i + ++ c->main_first; ++ int found, cat = lprops->flags & LPROPS_CAT_MASK; ++ struct ubifs_lpt_heap *heap; ++ struct list_head *list = NULL; ++ ++ if (lnum >= c->leb_cnt) ++ continue; ++ if (lprops->lnum != lnum) { ++ dbg_err("bad LEB number %d expected %d", ++ lprops->lnum, lnum); ++ return -EINVAL; ++ } ++ if (lprops->flags & LPROPS_TAKEN) { ++ if (cat != LPROPS_UNCAT) { ++ dbg_err("LEB %d taken but not uncat %d", ++ lprops->lnum, cat); ++ return -EINVAL; ++ } ++ continue; ++ } ++ if (lprops->flags & LPROPS_INDEX) { ++ switch (cat) { ++ case LPROPS_UNCAT: ++ case LPROPS_DIRTY_IDX: ++ case LPROPS_FRDI_IDX: ++ break; ++ default: ++ dbg_err("LEB %d index but cat %d", ++ lprops->lnum, cat); ++ return -EINVAL; ++ } ++ } else { ++ switch (cat) { ++ case LPROPS_UNCAT: ++ case LPROPS_DIRTY: ++ case LPROPS_FREE: ++ case LPROPS_EMPTY: ++ case LPROPS_FREEABLE: ++ break; ++ default: ++ dbg_err("LEB %d not index but cat %d", ++ lprops->lnum, cat); ++ return -EINVAL; ++ } ++ } ++ switch (cat) { ++ case LPROPS_UNCAT: ++ list = &c->uncat_list; ++ break; ++ case LPROPS_EMPTY: ++ list = &c->empty_list; ++ break; ++ case LPROPS_FREEABLE: ++ list = &c->freeable_list; ++ break; ++ case LPROPS_FRDI_IDX: ++ list = &c->frdi_idx_list; ++ break; ++ } ++ found = 0; ++ switch (cat) { ++ case LPROPS_DIRTY: ++ case LPROPS_DIRTY_IDX: ++ case LPROPS_FREE: ++ heap = &c->lpt_heap[cat - 1]; ++ if (lprops->hpos < heap->cnt && ++ heap->arr[lprops->hpos] == lprops) ++ found = 1; ++ break; ++ case LPROPS_UNCAT: ++ case LPROPS_EMPTY: ++ case LPROPS_FREEABLE: ++ case LPROPS_FRDI_IDX: ++ list_for_each_entry(lp, list, list) ++ if (lprops == lp) { ++ found = 1; ++ break; ++ } ++ break; ++ } ++ if (!found) { ++ dbg_err("LEB %d cat %d not found in cat heap/list", ++ lprops->lnum, cat); ++ return -EINVAL; ++ } ++ switch (cat) { ++ case LPROPS_EMPTY: ++ if (lprops->free != c->leb_size) { ++ dbg_err("LEB %d cat %d free %d dirty %d", ++ lprops->lnum, cat, lprops->free, ++ lprops->dirty); ++ return -EINVAL; ++ } ++ case LPROPS_FREEABLE: ++ case LPROPS_FRDI_IDX: ++ if (lprops->free + lprops->dirty != c->leb_size) { ++ dbg_err("LEB %d cat %d free %d dirty %d", ++ lprops->lnum, cat, lprops->free, ++ lprops->dirty); ++ return -EINVAL; ++ } ++ } ++ } ++ return 0; ++} ++ ++/** ++ * dbg_check_lpt_nodes - check nnodes and pnodes. ++ * @c: the UBIFS file-system description object ++ * @cnode: next cnode (nnode or pnode) to check ++ * @row: row of cnode (root is zero) ++ * @col: column of cnode (leftmost is zero) ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, ++ int row, int col) ++{ ++ struct ubifs_nnode *nnode, *nn; ++ struct ubifs_cnode *cn; ++ int num, iip = 0, err; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ return 0; ++ ++ while (cnode) { ++ ubifs_assert(row >= 0); ++ nnode = cnode->parent; ++ if (cnode->level) { ++ /* cnode is a nnode */ ++ num = calc_nnode_num(row, col); ++ if (cnode->num != num) { ++ dbg_err("nnode num %d expected %d " ++ "parent num %d iip %d", cnode->num, num, ++ (nnode ? nnode->num : 0), cnode->iip); ++ return -EINVAL; ++ } ++ nn = (struct ubifs_nnode *)cnode; ++ while (iip < UBIFS_LPT_FANOUT) { ++ cn = nn->nbranch[iip].cnode; ++ if (cn) { ++ /* Go down */ ++ row += 1; ++ col <<= UBIFS_LPT_FANOUT_SHIFT; ++ col += iip; ++ iip = 0; ++ cnode = cn; ++ break; ++ } ++ /* Go right */ ++ iip += 1; ++ } ++ if (iip < UBIFS_LPT_FANOUT) ++ continue; ++ } else { ++ struct ubifs_pnode *pnode; ++ ++ /* cnode is a pnode */ ++ pnode = (struct ubifs_pnode *)cnode; ++ err = dbg_chk_pnode(c, pnode, col); ++ if (err) ++ return err; ++ } ++ /* Go up and to the right */ ++ row -= 1; ++ col >>= UBIFS_LPT_FANOUT_SHIFT; ++ iip = cnode->iip + 1; ++ cnode = (struct ubifs_cnode *)nnode; ++ } ++ return 0; ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -Nurd linux-2.6.24/fs/ubifs/lpt_commit.c ubifs-v2.6.24/fs/ubifs/lpt_commit.c +--- linux-2.6.24/fs/ubifs/lpt_commit.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/lpt_commit.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1973 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements commit-related functionality of the LEB properties ++ * subsystem. ++ */ ++ ++#include <linux/crc16.h> ++#include "ubifs.h" ++ ++/** ++ * first_dirty_cnode - find first dirty cnode. ++ * @c: UBIFS file-system description object ++ * @nnode: nnode at which to start ++ * ++ * This function returns the first dirty cnode or %NULL if there is not one. ++ */ ++static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode) ++{ ++ ubifs_assert(nnode); ++ while (1) { ++ int i, cont = 0; ++ ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ struct ubifs_cnode *cnode; ++ ++ cnode = nnode->nbranch[i].cnode; ++ if (cnode && ++ test_bit(DIRTY_CNODE, &cnode->flags)) { ++ if (cnode->level == 0) ++ return cnode; ++ nnode = (struct ubifs_nnode *)cnode; ++ cont = 1; ++ break; ++ } ++ } ++ if (!cont) ++ return (struct ubifs_cnode *)nnode; ++ } ++} ++ ++/** ++ * next_dirty_cnode - find next dirty cnode. ++ * @cnode: cnode from which to begin searching ++ * ++ * This function returns the next dirty cnode or %NULL if there is not one. ++ */ ++static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode) ++{ ++ struct ubifs_nnode *nnode; ++ int i; ++ ++ ubifs_assert(cnode); ++ nnode = cnode->parent; ++ if (!nnode) ++ return NULL; ++ for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) { ++ cnode = nnode->nbranch[i].cnode; ++ if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) { ++ if (cnode->level == 0) ++ return cnode; /* cnode is a pnode */ ++ /* cnode is a nnode */ ++ return first_dirty_cnode((struct ubifs_nnode *)cnode); ++ } ++ } ++ return (struct ubifs_cnode *)nnode; ++} ++ ++/** ++ * get_cnodes_to_commit - create list of dirty cnodes to commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns the number of cnodes to commit. ++ */ ++static int get_cnodes_to_commit(struct ubifs_info *c) ++{ ++ struct ubifs_cnode *cnode, *cnext; ++ int cnt = 0; ++ ++ if (!c->nroot) ++ return 0; ++ ++ if (!test_bit(DIRTY_CNODE, &c->nroot->flags)) ++ return 0; ++ ++ c->lpt_cnext = first_dirty_cnode(c->nroot); ++ cnode = c->lpt_cnext; ++ if (!cnode) ++ return 0; ++ cnt += 1; ++ while (1) { ++ ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); ++ __set_bit(COW_ZNODE, &cnode->flags); ++ cnext = next_dirty_cnode(cnode); ++ if (!cnext) { ++ cnode->cnext = c->lpt_cnext; ++ break; ++ } ++ cnode->cnext = cnext; ++ cnode = cnext; ++ cnt += 1; ++ } ++ dbg_cmt("committing %d cnodes", cnt); ++ dbg_lp("committing %d cnodes", cnt); ++ ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt); ++ return cnt; ++} ++ ++/** ++ * upd_ltab - update LPT LEB properties. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number ++ * @free: amount of free space ++ * @dirty: amount of dirty space to add ++ */ ++static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty) ++{ ++ dbg_lp("LEB %d free %d dirty %d to %d +%d", ++ lnum, c->ltab[lnum - c->lpt_first].free, ++ c->ltab[lnum - c->lpt_first].dirty, free, dirty); ++ ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); ++ c->ltab[lnum - c->lpt_first].free = free; ++ c->ltab[lnum - c->lpt_first].dirty += dirty; ++} ++ ++/** ++ * alloc_lpt_leb - allocate an LPT LEB that is empty. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number is passed and returned here ++ * ++ * This function finds the next empty LEB in the ltab starting from @lnum. If a ++ * an empty LEB is found it is returned in @lnum and the function returns %0. ++ * Otherwise the function returns -ENOSPC. Note however, that LPT is designed ++ * never to run out of space. ++ */ ++static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) ++{ ++ int i, n; ++ ++ n = *lnum - c->lpt_first + 1; ++ for (i = n; i < c->lpt_lebs; i++) { ++ if (c->ltab[i].tgc || c->ltab[i].cmt) ++ continue; ++ if (c->ltab[i].free == c->leb_size) { ++ c->ltab[i].cmt = 1; ++ *lnum = i + c->lpt_first; ++ return 0; ++ } ++ } ++ ++ for (i = 0; i < n; i++) { ++ if (c->ltab[i].tgc || c->ltab[i].cmt) ++ continue; ++ if (c->ltab[i].free == c->leb_size) { ++ c->ltab[i].cmt = 1; ++ *lnum = i + c->lpt_first; ++ return 0; ++ } ++ } ++ return -ENOSPC; ++} ++ ++/** ++ * layout_cnodes - layout cnodes for commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int layout_cnodes(struct ubifs_info *c) ++{ ++ int lnum, offs, len, alen, done_lsave, done_ltab, err; ++ struct ubifs_cnode *cnode; ++ ++ err = dbg_chk_lpt_sz(c, 0, 0); ++ if (err) ++ return err; ++ cnode = c->lpt_cnext; ++ if (!cnode) ++ return 0; ++ lnum = c->nhead_lnum; ++ offs = c->nhead_offs; ++ /* Try to place lsave and ltab nicely */ ++ done_lsave = !c->big_lpt; ++ done_ltab = 0; ++ if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { ++ done_lsave = 1; ++ c->lsave_lnum = lnum; ++ c->lsave_offs = offs; ++ offs += c->lsave_sz; ++ dbg_chk_lpt_sz(c, 1, c->lsave_sz); ++ } ++ ++ if (offs + c->ltab_sz <= c->leb_size) { ++ done_ltab = 1; ++ c->ltab_lnum = lnum; ++ c->ltab_offs = offs; ++ offs += c->ltab_sz; ++ dbg_chk_lpt_sz(c, 1, c->ltab_sz); ++ } ++ ++ do { ++ if (cnode->level) { ++ len = c->nnode_sz; ++ c->dirty_nn_cnt -= 1; ++ } else { ++ len = c->pnode_sz; ++ c->dirty_pn_cnt -= 1; ++ } ++ while (offs + len > c->leb_size) { ++ alen = ALIGN(offs, c->min_io_size); ++ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); ++ dbg_chk_lpt_sz(c, 2, c->leb_size - offs); ++ err = alloc_lpt_leb(c, &lnum); ++ if (err) ++ goto no_space; ++ offs = 0; ++ ubifs_assert(lnum >= c->lpt_first && ++ lnum <= c->lpt_last); ++ /* Try to place lsave and ltab nicely */ ++ if (!done_lsave) { ++ done_lsave = 1; ++ c->lsave_lnum = lnum; ++ c->lsave_offs = offs; ++ offs += c->lsave_sz; ++ dbg_chk_lpt_sz(c, 1, c->lsave_sz); ++ continue; ++ } ++ if (!done_ltab) { ++ done_ltab = 1; ++ c->ltab_lnum = lnum; ++ c->ltab_offs = offs; ++ offs += c->ltab_sz; ++ dbg_chk_lpt_sz(c, 1, c->ltab_sz); ++ continue; ++ } ++ break; ++ } ++ if (cnode->parent) { ++ cnode->parent->nbranch[cnode->iip].lnum = lnum; ++ cnode->parent->nbranch[cnode->iip].offs = offs; ++ } else { ++ c->lpt_lnum = lnum; ++ c->lpt_offs = offs; ++ } ++ offs += len; ++ dbg_chk_lpt_sz(c, 1, len); ++ cnode = cnode->cnext; ++ } while (cnode && cnode != c->lpt_cnext); ++ ++ /* Make sure to place LPT's save table */ ++ if (!done_lsave) { ++ if (offs + c->lsave_sz > c->leb_size) { ++ alen = ALIGN(offs, c->min_io_size); ++ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); ++ dbg_chk_lpt_sz(c, 2, c->leb_size - offs); ++ err = alloc_lpt_leb(c, &lnum); ++ if (err) ++ goto no_space; ++ offs = 0; ++ ubifs_assert(lnum >= c->lpt_first && ++ lnum <= c->lpt_last); ++ } ++ done_lsave = 1; ++ c->lsave_lnum = lnum; ++ c->lsave_offs = offs; ++ offs += c->lsave_sz; ++ dbg_chk_lpt_sz(c, 1, c->lsave_sz); ++ } ++ ++ /* Make sure to place LPT's own lprops table */ ++ if (!done_ltab) { ++ if (offs + c->ltab_sz > c->leb_size) { ++ alen = ALIGN(offs, c->min_io_size); ++ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); ++ dbg_chk_lpt_sz(c, 2, c->leb_size - offs); ++ err = alloc_lpt_leb(c, &lnum); ++ if (err) ++ goto no_space; ++ offs = 0; ++ ubifs_assert(lnum >= c->lpt_first && ++ lnum <= c->lpt_last); ++ } ++ done_ltab = 1; ++ c->ltab_lnum = lnum; ++ c->ltab_offs = offs; ++ offs += c->ltab_sz; ++ dbg_chk_lpt_sz(c, 1, c->ltab_sz); ++ } ++ ++ alen = ALIGN(offs, c->min_io_size); ++ upd_ltab(c, lnum, c->leb_size - alen, alen - offs); ++ dbg_chk_lpt_sz(c, 4, alen - offs); ++ err = dbg_chk_lpt_sz(c, 3, alen); ++ if (err) ++ return err; ++ return 0; ++ ++no_space: ++ ubifs_err("LPT out of space"); ++ dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " ++ "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); ++ dbg_dump_lpt_info(c); ++ dbg_dump_lpt_lebs(c); ++ dump_stack(); ++ return err; ++} ++ ++/** ++ * realloc_lpt_leb - allocate an LPT LEB that is empty. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number is passed and returned here ++ * ++ * This function duplicates exactly the results of the function alloc_lpt_leb. ++ * It is used during end commit to reallocate the same LEB numbers that were ++ * allocated by alloc_lpt_leb during start commit. ++ * ++ * This function finds the next LEB that was allocated by the alloc_lpt_leb ++ * function starting from @lnum. If a LEB is found it is returned in @lnum and ++ * the function returns %0. Otherwise the function returns -ENOSPC. ++ * Note however, that LPT is designed never to run out of space. ++ */ ++static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) ++{ ++ int i, n; ++ ++ n = *lnum - c->lpt_first + 1; ++ for (i = n; i < c->lpt_lebs; i++) ++ if (c->ltab[i].cmt) { ++ c->ltab[i].cmt = 0; ++ *lnum = i + c->lpt_first; ++ return 0; ++ } ++ ++ for (i = 0; i < n; i++) ++ if (c->ltab[i].cmt) { ++ c->ltab[i].cmt = 0; ++ *lnum = i + c->lpt_first; ++ return 0; ++ } ++ return -ENOSPC; ++} ++ ++/** ++ * write_cnodes - write cnodes for commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int write_cnodes(struct ubifs_info *c) ++{ ++ int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave; ++ struct ubifs_cnode *cnode; ++ void *buf = c->lpt_buf; ++ ++ cnode = c->lpt_cnext; ++ if (!cnode) ++ return 0; ++ lnum = c->nhead_lnum; ++ offs = c->nhead_offs; ++ from = offs; ++ /* Ensure empty LEB is unmapped */ ++ if (offs == 0) { ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } ++ /* Try to place lsave and ltab nicely */ ++ done_lsave = !c->big_lpt; ++ done_ltab = 0; ++ if (!done_lsave && offs + c->lsave_sz <= c->leb_size) { ++ done_lsave = 1; ++ ubifs_pack_lsave(c, buf + offs, c->lsave); ++ offs += c->lsave_sz; ++ dbg_chk_lpt_sz(c, 1, c->lsave_sz); ++ } ++ ++ if (offs + c->ltab_sz <= c->leb_size) { ++ done_ltab = 1; ++ ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); ++ offs += c->ltab_sz; ++ dbg_chk_lpt_sz(c, 1, c->ltab_sz); ++ } ++ ++ /* Loop for each cnode */ ++ do { ++ if (cnode->level) ++ len = c->nnode_sz; ++ else ++ len = c->pnode_sz; ++ while (offs + len > c->leb_size) { ++ wlen = offs - from; ++ if (wlen) { ++ alen = ALIGN(wlen, c->min_io_size); ++ memset(buf + offs, 0xff, alen - wlen); ++ err = ubifs_leb_write(c, lnum, buf + from, from, ++ alen, UBI_SHORTTERM); ++ if (err) ++ return err; ++ } ++ dbg_chk_lpt_sz(c, 2, c->leb_size - offs); ++ err = realloc_lpt_leb(c, &lnum); ++ if (err) ++ goto no_space; ++ offs = from = 0; ++ ubifs_assert(lnum >= c->lpt_first && ++ lnum <= c->lpt_last); ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ /* Try to place lsave and ltab nicely */ ++ if (!done_lsave) { ++ done_lsave = 1; ++ ubifs_pack_lsave(c, buf + offs, c->lsave); ++ offs += c->lsave_sz; ++ dbg_chk_lpt_sz(c, 1, c->lsave_sz); ++ continue; ++ } ++ if (!done_ltab) { ++ done_ltab = 1; ++ ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); ++ offs += c->ltab_sz; ++ dbg_chk_lpt_sz(c, 1, c->ltab_sz); ++ continue; ++ } ++ break; ++ } ++ if (cnode->level) ++ ubifs_pack_nnode(c, buf + offs, ++ (struct ubifs_nnode *)cnode); ++ else ++ ubifs_pack_pnode(c, buf + offs, ++ (struct ubifs_pnode *)cnode); ++ /* ++ * The reason for the barriers is the same as in case of TNC. ++ * See comment in 'write_index()'. 'dirty_cow_nnode()' and ++ * 'dirty_cow_pnode()' are the functions for which this is ++ * important. ++ */ ++ clear_bit(DIRTY_CNODE, &cnode->flags); ++ smp_mb__before_clear_bit(); ++ clear_bit(COW_ZNODE, &cnode->flags); ++ smp_mb__after_clear_bit(); ++ offs += len; ++ dbg_chk_lpt_sz(c, 1, len); ++ cnode = cnode->cnext; ++ } while (cnode && cnode != c->lpt_cnext); ++ ++ /* Make sure to place LPT's save table */ ++ if (!done_lsave) { ++ if (offs + c->lsave_sz > c->leb_size) { ++ wlen = offs - from; ++ alen = ALIGN(wlen, c->min_io_size); ++ memset(buf + offs, 0xff, alen - wlen); ++ err = ubifs_leb_write(c, lnum, buf + from, from, alen, ++ UBI_SHORTTERM); ++ if (err) ++ return err; ++ dbg_chk_lpt_sz(c, 2, c->leb_size - offs); ++ err = realloc_lpt_leb(c, &lnum); ++ if (err) ++ goto no_space; ++ offs = from = 0; ++ ubifs_assert(lnum >= c->lpt_first && ++ lnum <= c->lpt_last); ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } ++ done_lsave = 1; ++ ubifs_pack_lsave(c, buf + offs, c->lsave); ++ offs += c->lsave_sz; ++ dbg_chk_lpt_sz(c, 1, c->lsave_sz); ++ } ++ ++ /* Make sure to place LPT's own lprops table */ ++ if (!done_ltab) { ++ if (offs + c->ltab_sz > c->leb_size) { ++ wlen = offs - from; ++ alen = ALIGN(wlen, c->min_io_size); ++ memset(buf + offs, 0xff, alen - wlen); ++ err = ubifs_leb_write(c, lnum, buf + from, from, alen, ++ UBI_SHORTTERM); ++ if (err) ++ return err; ++ dbg_chk_lpt_sz(c, 2, c->leb_size - offs); ++ err = realloc_lpt_leb(c, &lnum); ++ if (err) ++ goto no_space; ++ offs = from = 0; ++ ubifs_assert(lnum >= c->lpt_first && ++ lnum <= c->lpt_last); ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } ++ done_ltab = 1; ++ ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); ++ offs += c->ltab_sz; ++ dbg_chk_lpt_sz(c, 1, c->ltab_sz); ++ } ++ ++ /* Write remaining data in buffer */ ++ wlen = offs - from; ++ alen = ALIGN(wlen, c->min_io_size); ++ memset(buf + offs, 0xff, alen - wlen); ++ err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); ++ if (err) ++ return err; ++ ++ dbg_chk_lpt_sz(c, 4, alen - wlen); ++ err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size)); ++ if (err) ++ return err; ++ ++ c->nhead_lnum = lnum; ++ c->nhead_offs = ALIGN(offs, c->min_io_size); ++ ++ dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); ++ dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); ++ dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); ++ if (c->big_lpt) ++ dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); ++ ++ return 0; ++ ++no_space: ++ ubifs_err("LPT out of space mismatch"); ++ dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " ++ "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); ++ dbg_dump_lpt_info(c); ++ dbg_dump_lpt_lebs(c); ++ dump_stack(); ++ return err; ++} ++ ++/** ++ * next_pnode_to_dirty - find next pnode to dirty. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode ++ * ++ * This function returns the next pnode to dirty or %NULL if there are no more ++ * pnodes. Note that pnodes that have never been written (lnum == 0) are ++ * skipped. ++ */ ++static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, ++ struct ubifs_pnode *pnode) ++{ ++ struct ubifs_nnode *nnode; ++ int iip; ++ ++ /* Try to go right */ ++ nnode = pnode->parent; ++ for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { ++ if (nnode->nbranch[iip].lnum) ++ return ubifs_get_pnode(c, nnode, iip); ++ } ++ ++ /* Go up while can't go right */ ++ do { ++ iip = nnode->iip + 1; ++ nnode = nnode->parent; ++ if (!nnode) ++ return NULL; ++ for (; iip < UBIFS_LPT_FANOUT; iip++) { ++ if (nnode->nbranch[iip].lnum) ++ break; ++ } ++ } while (iip >= UBIFS_LPT_FANOUT); ++ ++ /* Go right */ ++ nnode = ubifs_get_nnode(c, nnode, iip); ++ if (IS_ERR(nnode)) ++ return (void *)nnode; ++ ++ /* Go down to level 1 */ ++ while (nnode->level > 1) { ++ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) { ++ if (nnode->nbranch[iip].lnum) ++ break; ++ } ++ if (iip >= UBIFS_LPT_FANOUT) { ++ /* ++ * Should not happen, but we need to keep going ++ * if it does. ++ */ ++ iip = 0; ++ } ++ nnode = ubifs_get_nnode(c, nnode, iip); ++ if (IS_ERR(nnode)) ++ return (void *)nnode; ++ } ++ ++ for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) ++ if (nnode->nbranch[iip].lnum) ++ break; ++ if (iip >= UBIFS_LPT_FANOUT) ++ /* Should not happen, but we need to keep going if it does */ ++ iip = 0; ++ return ubifs_get_pnode(c, nnode, iip); ++} ++ ++/** ++ * pnode_lookup - lookup a pnode in the LPT. ++ * @c: UBIFS file-system description object ++ * @i: pnode number (0 to main_lebs - 1) ++ * ++ * This function returns a pointer to the pnode on success or a negative ++ * error code on failure. ++ */ ++static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) ++{ ++ int err, h, iip, shft; ++ struct ubifs_nnode *nnode; ++ ++ if (!c->nroot) { ++ err = ubifs_read_nnode(c, NULL, 0); ++ if (err) ++ return ERR_PTR(err); ++ } ++ i <<= UBIFS_LPT_FANOUT_SHIFT; ++ nnode = c->nroot; ++ shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; ++ for (h = 1; h < c->lpt_hght; h++) { ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ shft -= UBIFS_LPT_FANOUT_SHIFT; ++ nnode = ubifs_get_nnode(c, nnode, iip); ++ if (IS_ERR(nnode)) ++ return ERR_PTR(PTR_ERR(nnode)); ++ } ++ iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); ++ return ubifs_get_pnode(c, nnode, iip); ++} ++ ++/** ++ * add_pnode_dirt - add dirty space to LPT LEB properties. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode for which to add dirt ++ */ ++static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) ++{ ++ ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, ++ c->pnode_sz); ++} ++ ++/** ++ * do_make_pnode_dirty - mark a pnode dirty. ++ * @c: UBIFS file-system description object ++ * @pnode: pnode to mark dirty ++ */ ++static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode) ++{ ++ /* Assumes cnext list is empty i.e. not called during commit */ ++ if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { ++ struct ubifs_nnode *nnode; ++ ++ c->dirty_pn_cnt += 1; ++ add_pnode_dirt(c, pnode); ++ /* Mark parent and ancestors dirty too */ ++ nnode = pnode->parent; ++ while (nnode) { ++ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { ++ c->dirty_nn_cnt += 1; ++ ubifs_add_nnode_dirt(c, nnode); ++ nnode = nnode->parent; ++ } else ++ break; ++ } ++ } ++} ++ ++/** ++ * make_tree_dirty - mark the entire LEB properties tree dirty. ++ * @c: UBIFS file-system description object ++ * ++ * This function is used by the "small" LPT model to cause the entire LEB ++ * properties tree to be written. The "small" LPT model does not use LPT ++ * garbage collection because it is more efficient to write the entire tree ++ * (because it is small). ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int make_tree_dirty(struct ubifs_info *c) ++{ ++ struct ubifs_pnode *pnode; ++ ++ pnode = pnode_lookup(c, 0); ++ while (pnode) { ++ do_make_pnode_dirty(c, pnode); ++ pnode = next_pnode_to_dirty(c, pnode); ++ if (IS_ERR(pnode)) ++ return PTR_ERR(pnode); ++ } ++ return 0; ++} ++ ++/** ++ * need_write_all - determine if the LPT area is running out of free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %1 if the LPT area is running out of free space and %0 ++ * if it is not. ++ */ ++static int need_write_all(struct ubifs_info *c) ++{ ++ long long free = 0; ++ int i; ++ ++ for (i = 0; i < c->lpt_lebs; i++) { ++ if (i + c->lpt_first == c->nhead_lnum) ++ free += c->leb_size - c->nhead_offs; ++ else if (c->ltab[i].free == c->leb_size) ++ free += c->leb_size; ++ else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size) ++ free += c->leb_size; ++ } ++ /* Less than twice the size left */ ++ if (free <= c->lpt_sz * 2) ++ return 1; ++ return 0; ++} ++ ++/** ++ * lpt_tgc_start - start trivial garbage collection of LPT LEBs. ++ * @c: UBIFS file-system description object ++ * ++ * LPT trivial garbage collection is where a LPT LEB contains only dirty and ++ * free space and so may be reused as soon as the next commit is completed. ++ * This function is called during start commit to mark LPT LEBs for trivial GC. ++ */ ++static void lpt_tgc_start(struct ubifs_info *c) ++{ ++ int i; ++ ++ for (i = 0; i < c->lpt_lebs; i++) { ++ if (i + c->lpt_first == c->nhead_lnum) ++ continue; ++ if (c->ltab[i].dirty > 0 && ++ c->ltab[i].free + c->ltab[i].dirty == c->leb_size) { ++ c->ltab[i].tgc = 1; ++ c->ltab[i].free = c->leb_size; ++ c->ltab[i].dirty = 0; ++ dbg_lp("LEB %d", i + c->lpt_first); ++ } ++ } ++} ++ ++/** ++ * lpt_tgc_end - end trivial garbage collection of LPT LEBs. ++ * @c: UBIFS file-system description object ++ * ++ * LPT trivial garbage collection is where a LPT LEB contains only dirty and ++ * free space and so may be reused as soon as the next commit is completed. ++ * This function is called after the commit is completed (master node has been ++ * written) and un-maps LPT LEBs that were marked for trivial GC. ++ */ ++static int lpt_tgc_end(struct ubifs_info *c) ++{ ++ int i, err; ++ ++ for (i = 0; i < c->lpt_lebs; i++) ++ if (c->ltab[i].tgc) { ++ err = ubifs_leb_unmap(c, i + c->lpt_first); ++ if (err) ++ return err; ++ c->ltab[i].tgc = 0; ++ dbg_lp("LEB %d", i + c->lpt_first); ++ } ++ return 0; ++} ++ ++/** ++ * populate_lsave - fill the lsave array with important LEB numbers. ++ * @c: the UBIFS file-system description object ++ * ++ * This function is only called for the "big" model. It records a small number ++ * of LEB numbers of important LEBs. Important LEBs are ones that are (from ++ * most important to least important): empty, freeable, freeable index, dirty ++ * index, dirty or free. Upon mount, we read this list of LEB numbers and bring ++ * their pnodes into memory. That will stop us from having to scan the LPT ++ * straight away. For the "small" model we assume that scanning the LPT is no ++ * big deal. ++ */ ++static void populate_lsave(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ int i, cnt = 0; ++ ++ ubifs_assert(c->big_lpt); ++ if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { ++ c->lpt_drty_flgs |= LSAVE_DIRTY; ++ ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); ++ } ++ list_for_each_entry(lprops, &c->empty_list, list) { ++ c->lsave[cnt++] = lprops->lnum; ++ if (cnt >= c->lsave_cnt) ++ return; ++ } ++ list_for_each_entry(lprops, &c->freeable_list, list) { ++ c->lsave[cnt++] = lprops->lnum; ++ if (cnt >= c->lsave_cnt) ++ return; ++ } ++ list_for_each_entry(lprops, &c->frdi_idx_list, list) { ++ c->lsave[cnt++] = lprops->lnum; ++ if (cnt >= c->lsave_cnt) ++ return; ++ } ++ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; ++ for (i = 0; i < heap->cnt; i++) { ++ c->lsave[cnt++] = heap->arr[i]->lnum; ++ if (cnt >= c->lsave_cnt) ++ return; ++ } ++ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; ++ for (i = 0; i < heap->cnt; i++) { ++ c->lsave[cnt++] = heap->arr[i]->lnum; ++ if (cnt >= c->lsave_cnt) ++ return; ++ } ++ heap = &c->lpt_heap[LPROPS_FREE - 1]; ++ for (i = 0; i < heap->cnt; i++) { ++ c->lsave[cnt++] = heap->arr[i]->lnum; ++ if (cnt >= c->lsave_cnt) ++ return; ++ } ++ /* Fill it up completely */ ++ while (cnt < c->lsave_cnt) ++ c->lsave[cnt++] = c->main_first; ++} ++ ++/** ++ * nnode_lookup - lookup a nnode in the LPT. ++ * @c: UBIFS file-system description object ++ * @i: nnode number ++ * ++ * This function returns a pointer to the nnode on success or a negative ++ * error code on failure. ++ */ ++static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i) ++{ ++ int err, iip; ++ struct ubifs_nnode *nnode; ++ ++ if (!c->nroot) { ++ err = ubifs_read_nnode(c, NULL, 0); ++ if (err) ++ return ERR_PTR(err); ++ } ++ nnode = c->nroot; ++ while (1) { ++ iip = i & (UBIFS_LPT_FANOUT - 1); ++ i >>= UBIFS_LPT_FANOUT_SHIFT; ++ if (!i) ++ break; ++ nnode = ubifs_get_nnode(c, nnode, iip); ++ if (IS_ERR(nnode)) ++ return nnode; ++ } ++ return nnode; ++} ++ ++/** ++ * make_nnode_dirty - find a nnode and, if found, make it dirty. ++ * @c: UBIFS file-system description object ++ * @node_num: nnode number of nnode to make dirty ++ * @lnum: LEB number where nnode was written ++ * @offs: offset where nnode was written ++ * ++ * This function is used by LPT garbage collection. LPT garbage collection is ++ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection ++ * simply involves marking all the nodes in the LEB being garbage-collected as ++ * dirty. The dirty nodes are written next commit, after which the LEB is free ++ * to be reused. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum, ++ int offs) ++{ ++ struct ubifs_nnode *nnode; ++ ++ nnode = nnode_lookup(c, node_num); ++ if (IS_ERR(nnode)) ++ return PTR_ERR(nnode); ++ if (nnode->parent) { ++ struct ubifs_nbranch *branch; ++ ++ branch = &nnode->parent->nbranch[nnode->iip]; ++ if (branch->lnum != lnum || branch->offs != offs) ++ return 0; /* nnode is obsolete */ ++ } else if (c->lpt_lnum != lnum || c->lpt_offs != offs) ++ return 0; /* nnode is obsolete */ ++ /* Assumes cnext list is empty i.e. not called during commit */ ++ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { ++ c->dirty_nn_cnt += 1; ++ ubifs_add_nnode_dirt(c, nnode); ++ /* Mark parent and ancestors dirty too */ ++ nnode = nnode->parent; ++ while (nnode) { ++ if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { ++ c->dirty_nn_cnt += 1; ++ ubifs_add_nnode_dirt(c, nnode); ++ nnode = nnode->parent; ++ } else ++ break; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * make_pnode_dirty - find a pnode and, if found, make it dirty. ++ * @c: UBIFS file-system description object ++ * @node_num: pnode number of pnode to make dirty ++ * @lnum: LEB number where pnode was written ++ * @offs: offset where pnode was written ++ * ++ * This function is used by LPT garbage collection. LPT garbage collection is ++ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection ++ * simply involves marking all the nodes in the LEB being garbage-collected as ++ * dirty. The dirty nodes are written next commit, after which the LEB is free ++ * to be reused. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum, ++ int offs) ++{ ++ struct ubifs_pnode *pnode; ++ struct ubifs_nbranch *branch; ++ ++ pnode = pnode_lookup(c, node_num); ++ if (IS_ERR(pnode)) ++ return PTR_ERR(pnode); ++ branch = &pnode->parent->nbranch[pnode->iip]; ++ if (branch->lnum != lnum || branch->offs != offs) ++ return 0; ++ do_make_pnode_dirty(c, pnode); ++ return 0; ++} ++ ++/** ++ * make_ltab_dirty - make ltab node dirty. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number where ltab was written ++ * @offs: offset where ltab was written ++ * ++ * This function is used by LPT garbage collection. LPT garbage collection is ++ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection ++ * simply involves marking all the nodes in the LEB being garbage-collected as ++ * dirty. The dirty nodes are written next commit, after which the LEB is free ++ * to be reused. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs) ++{ ++ if (lnum != c->ltab_lnum || offs != c->ltab_offs) ++ return 0; /* This ltab node is obsolete */ ++ if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { ++ c->lpt_drty_flgs |= LTAB_DIRTY; ++ ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); ++ } ++ return 0; ++} ++ ++/** ++ * make_lsave_dirty - make lsave node dirty. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number where lsave was written ++ * @offs: offset where lsave was written ++ * ++ * This function is used by LPT garbage collection. LPT garbage collection is ++ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection ++ * simply involves marking all the nodes in the LEB being garbage-collected as ++ * dirty. The dirty nodes are written next commit, after which the LEB is free ++ * to be reused. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs) ++{ ++ if (lnum != c->lsave_lnum || offs != c->lsave_offs) ++ return 0; /* This lsave node is obsolete */ ++ if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) { ++ c->lpt_drty_flgs |= LSAVE_DIRTY; ++ ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); ++ } ++ return 0; ++} ++ ++/** ++ * make_node_dirty - make node dirty. ++ * @c: UBIFS file-system description object ++ * @node_type: LPT node type ++ * @node_num: node number ++ * @lnum: LEB number where node was written ++ * @offs: offset where node was written ++ * ++ * This function is used by LPT garbage collection. LPT garbage collection is ++ * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection ++ * simply involves marking all the nodes in the LEB being garbage-collected as ++ * dirty. The dirty nodes are written next commit, after which the LEB is free ++ * to be reused. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, ++ int lnum, int offs) ++{ ++ switch (node_type) { ++ case UBIFS_LPT_NNODE: ++ return make_nnode_dirty(c, node_num, lnum, offs); ++ case UBIFS_LPT_PNODE: ++ return make_pnode_dirty(c, node_num, lnum, offs); ++ case UBIFS_LPT_LTAB: ++ return make_ltab_dirty(c, lnum, offs); ++ case UBIFS_LPT_LSAVE: ++ return make_lsave_dirty(c, lnum, offs); ++ } ++ return -EINVAL; ++} ++ ++/** ++ * get_lpt_node_len - return the length of a node based on its type. ++ * @c: UBIFS file-system description object ++ * @node_type: LPT node type ++ */ ++static int get_lpt_node_len(const struct ubifs_info *c, int node_type) ++{ ++ switch (node_type) { ++ case UBIFS_LPT_NNODE: ++ return c->nnode_sz; ++ case UBIFS_LPT_PNODE: ++ return c->pnode_sz; ++ case UBIFS_LPT_LTAB: ++ return c->ltab_sz; ++ case UBIFS_LPT_LSAVE: ++ return c->lsave_sz; ++ } ++ return 0; ++} ++ ++/** ++ * get_pad_len - return the length of padding in a buffer. ++ * @c: UBIFS file-system description object ++ * @buf: buffer ++ * @len: length of buffer ++ */ ++static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len) ++{ ++ int offs, pad_len; ++ ++ if (c->min_io_size == 1) ++ return 0; ++ offs = c->leb_size - len; ++ pad_len = ALIGN(offs, c->min_io_size) - offs; ++ return pad_len; ++} ++ ++/** ++ * get_lpt_node_type - return type (and node number) of a node in a buffer. ++ * @c: UBIFS file-system description object ++ * @buf: buffer ++ * @node_num: node number is returned here ++ */ ++static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf, ++ int *node_num) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int pos = 0, node_type; ++ ++ node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); ++ *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); ++ return node_type; ++} ++ ++/** ++ * is_a_node - determine if a buffer contains a node. ++ * @c: UBIFS file-system description object ++ * @buf: buffer ++ * @len: length of buffer ++ * ++ * This function returns %1 if the buffer contains a node or %0 if it does not. ++ */ ++static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len) ++{ ++ uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; ++ int pos = 0, node_type, node_len; ++ uint16_t crc, calc_crc; ++ ++ if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8) ++ return 0; ++ node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); ++ if (node_type == UBIFS_LPT_NOT_A_NODE) ++ return 0; ++ node_len = get_lpt_node_len(c, node_type); ++ if (!node_len || node_len > len) ++ return 0; ++ pos = 0; ++ addr = buf; ++ crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); ++ calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, ++ node_len - UBIFS_LPT_CRC_BYTES); ++ if (crc != calc_crc) ++ return 0; ++ return 1; ++} ++ ++/** ++ * lpt_gc_lnum - garbage collect a LPT LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to garbage collect ++ * ++ * LPT garbage collection is used only for the "big" LPT model ++ * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes ++ * in the LEB being garbage-collected as dirty. The dirty nodes are written ++ * next commit, after which the LEB is free to be reused. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int lpt_gc_lnum(struct ubifs_info *c, int lnum) ++{ ++ int err, len = c->leb_size, node_type, node_num, node_len, offs; ++ void *buf = c->lpt_buf; ++ ++ dbg_lp("LEB %d", lnum); ++ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); ++ if (err) { ++ ubifs_err("cannot read LEB %d, error %d", lnum, err); ++ return err; ++ } ++ while (1) { ++ if (!is_a_node(c, buf, len)) { ++ int pad_len; ++ ++ pad_len = get_pad_len(c, buf, len); ++ if (pad_len) { ++ buf += pad_len; ++ len -= pad_len; ++ continue; ++ } ++ return 0; ++ } ++ node_type = get_lpt_node_type(c, buf, &node_num); ++ node_len = get_lpt_node_len(c, node_type); ++ offs = c->leb_size - len; ++ ubifs_assert(node_len != 0); ++ mutex_lock(&c->lp_mutex); ++ err = make_node_dirty(c, node_type, node_num, lnum, offs); ++ mutex_unlock(&c->lp_mutex); ++ if (err) ++ return err; ++ buf += node_len; ++ len -= node_len; ++ } ++ return 0; ++} ++ ++/** ++ * lpt_gc - LPT garbage collection. ++ * @c: UBIFS file-system description object ++ * ++ * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'. ++ * Returns %0 on success and a negative error code on failure. ++ */ ++static int lpt_gc(struct ubifs_info *c) ++{ ++ int i, lnum = -1, dirty = 0; ++ ++ mutex_lock(&c->lp_mutex); ++ for (i = 0; i < c->lpt_lebs; i++) { ++ ubifs_assert(!c->ltab[i].tgc); ++ if (i + c->lpt_first == c->nhead_lnum || ++ c->ltab[i].free + c->ltab[i].dirty == c->leb_size) ++ continue; ++ if (c->ltab[i].dirty > dirty) { ++ dirty = c->ltab[i].dirty; ++ lnum = i + c->lpt_first; ++ } ++ } ++ mutex_unlock(&c->lp_mutex); ++ if (lnum == -1) ++ return -ENOSPC; ++ return lpt_gc_lnum(c, lnum); ++} ++ ++/** ++ * ubifs_lpt_start_commit - UBIFS commit starts. ++ * @c: the UBIFS file-system description object ++ * ++ * This function has to be called when UBIFS starts the commit operation. ++ * This function "freezes" all currently dirty LEB properties and does not ++ * change them anymore. Further changes are saved and tracked separately ++ * because they are not part of this commit. This function returns zero in case ++ * of success and a negative error code in case of failure. ++ */ ++int ubifs_lpt_start_commit(struct ubifs_info *c) ++{ ++ int err, cnt; ++ ++ dbg_lp(""); ++ ++ mutex_lock(&c->lp_mutex); ++ err = dbg_chk_lpt_free_spc(c); ++ if (err) ++ goto out; ++ err = dbg_check_ltab(c); ++ if (err) ++ goto out; ++ ++ if (c->check_lpt_free) { ++ /* ++ * We ensure there is enough free space in ++ * ubifs_lpt_post_commit() by marking nodes dirty. That ++ * information is lost when we unmount, so we also need ++ * to check free space once after mounting also. ++ */ ++ c->check_lpt_free = 0; ++ while (need_write_all(c)) { ++ mutex_unlock(&c->lp_mutex); ++ err = lpt_gc(c); ++ if (err) ++ return err; ++ mutex_lock(&c->lp_mutex); ++ } ++ } ++ ++ lpt_tgc_start(c); ++ ++ if (!c->dirty_pn_cnt) { ++ dbg_cmt("no cnodes to commit"); ++ err = 0; ++ goto out; ++ } ++ ++ if (!c->big_lpt && need_write_all(c)) { ++ /* If needed, write everything */ ++ err = make_tree_dirty(c); ++ if (err) ++ goto out; ++ lpt_tgc_start(c); ++ } ++ ++ if (c->big_lpt) ++ populate_lsave(c); ++ ++ cnt = get_cnodes_to_commit(c); ++ ubifs_assert(cnt != 0); ++ ++ err = layout_cnodes(c); ++ if (err) ++ goto out; ++ ++ /* Copy the LPT's own lprops for end commit to write */ ++ memcpy(c->ltab_cmt, c->ltab, ++ sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); ++ c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY); ++ ++out: ++ mutex_unlock(&c->lp_mutex); ++ return err; ++} ++ ++/** ++ * free_obsolete_cnodes - free obsolete cnodes for commit end. ++ * @c: UBIFS file-system description object ++ */ ++static void free_obsolete_cnodes(struct ubifs_info *c) ++{ ++ struct ubifs_cnode *cnode, *cnext; ++ ++ cnext = c->lpt_cnext; ++ if (!cnext) ++ return; ++ do { ++ cnode = cnext; ++ cnext = cnode->cnext; ++ if (test_bit(OBSOLETE_CNODE, &cnode->flags)) ++ kfree(cnode); ++ else ++ cnode->cnext = NULL; ++ } while (cnext != c->lpt_cnext); ++ c->lpt_cnext = NULL; ++} ++ ++/** ++ * ubifs_lpt_end_commit - finish the commit operation. ++ * @c: the UBIFS file-system description object ++ * ++ * This function has to be called when the commit operation finishes. It ++ * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to ++ * the media. Returns zero in case of success and a negative error code in case ++ * of failure. ++ */ ++int ubifs_lpt_end_commit(struct ubifs_info *c) ++{ ++ int err; ++ ++ dbg_lp(""); ++ ++ if (!c->lpt_cnext) ++ return 0; ++ ++ err = write_cnodes(c); ++ if (err) ++ return err; ++ ++ mutex_lock(&c->lp_mutex); ++ free_obsolete_cnodes(c); ++ mutex_unlock(&c->lp_mutex); ++ ++ return 0; ++} ++ ++/** ++ * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC. ++ * @c: UBIFS file-system description object ++ * ++ * LPT trivial GC is completed after a commit. Also LPT GC is done after a ++ * commit for the "big" LPT model. ++ */ ++int ubifs_lpt_post_commit(struct ubifs_info *c) ++{ ++ int err; ++ ++ mutex_lock(&c->lp_mutex); ++ err = lpt_tgc_end(c); ++ if (err) ++ goto out; ++ if (c->big_lpt) ++ while (need_write_all(c)) { ++ mutex_unlock(&c->lp_mutex); ++ err = lpt_gc(c); ++ if (err) ++ return err; ++ mutex_lock(&c->lp_mutex); ++ } ++out: ++ mutex_unlock(&c->lp_mutex); ++ return err; ++} ++ ++/** ++ * first_nnode - find the first nnode in memory. ++ * @c: UBIFS file-system description object ++ * @hght: height of tree where nnode found is returned here ++ * ++ * This function returns a pointer to the nnode found or %NULL if no nnode is ++ * found. This function is a helper to 'ubifs_lpt_free()'. ++ */ ++static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght) ++{ ++ struct ubifs_nnode *nnode; ++ int h, i, found; ++ ++ nnode = c->nroot; ++ *hght = 0; ++ if (!nnode) ++ return NULL; ++ for (h = 1; h < c->lpt_hght; h++) { ++ found = 0; ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ if (nnode->nbranch[i].nnode) { ++ found = 1; ++ nnode = nnode->nbranch[i].nnode; ++ *hght = h; ++ break; ++ } ++ } ++ if (!found) ++ break; ++ } ++ return nnode; ++} ++ ++/** ++ * next_nnode - find the next nnode in memory. ++ * @c: UBIFS file-system description object ++ * @nnode: nnode from which to start. ++ * @hght: height of tree where nnode is, is passed and returned here ++ * ++ * This function returns a pointer to the nnode found or %NULL if no nnode is ++ * found. This function is a helper to 'ubifs_lpt_free()'. ++ */ ++static struct ubifs_nnode *next_nnode(struct ubifs_info *c, ++ struct ubifs_nnode *nnode, int *hght) ++{ ++ struct ubifs_nnode *parent; ++ int iip, h, i, found; ++ ++ parent = nnode->parent; ++ if (!parent) ++ return NULL; ++ if (nnode->iip == UBIFS_LPT_FANOUT - 1) { ++ *hght -= 1; ++ return parent; ++ } ++ for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { ++ nnode = parent->nbranch[iip].nnode; ++ if (nnode) ++ break; ++ } ++ if (!nnode) { ++ *hght -= 1; ++ return parent; ++ } ++ for (h = *hght + 1; h < c->lpt_hght; h++) { ++ found = 0; ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ if (nnode->nbranch[i].nnode) { ++ found = 1; ++ nnode = nnode->nbranch[i].nnode; ++ *hght = h; ++ break; ++ } ++ } ++ if (!found) ++ break; ++ } ++ return nnode; ++} ++ ++/** ++ * ubifs_lpt_free - free resources owned by the LPT. ++ * @c: UBIFS file-system description object ++ * @wr_only: free only resources used for writing ++ */ ++void ubifs_lpt_free(struct ubifs_info *c, int wr_only) ++{ ++ struct ubifs_nnode *nnode; ++ int i, hght; ++ ++ /* Free write-only things first */ ++ ++ free_obsolete_cnodes(c); /* Leftover from a failed commit */ ++ ++ vfree(c->ltab_cmt); ++ c->ltab_cmt = NULL; ++ vfree(c->lpt_buf); ++ c->lpt_buf = NULL; ++ kfree(c->lsave); ++ c->lsave = NULL; ++ ++ if (wr_only) ++ return; ++ ++ /* Now free the rest */ ++ ++ nnode = first_nnode(c, &hght); ++ while (nnode) { ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) ++ kfree(nnode->nbranch[i].nnode); ++ nnode = next_nnode(c, nnode, &hght); ++ } ++ for (i = 0; i < LPROPS_HEAP_CNT; i++) ++ kfree(c->lpt_heap[i].arr); ++ kfree(c->dirty_idx.arr); ++ kfree(c->nroot); ++ vfree(c->ltab); ++ kfree(c->lpt_nod_buf); ++} ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++/** ++ * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes. ++ * @buf: buffer ++ * @len: buffer length ++ */ ++static int dbg_is_all_ff(uint8_t *buf, int len) ++{ ++ int i; ++ ++ for (i = 0; i < len; i++) ++ if (buf[i] != 0xff) ++ return 0; ++ return 1; ++} ++ ++/** ++ * dbg_is_nnode_dirty - determine if a nnode is dirty. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB number where nnode was written ++ * @offs: offset where nnode was written ++ */ ++static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) ++{ ++ struct ubifs_nnode *nnode; ++ int hght; ++ ++ /* Entire tree is in memory so first_nnode / next_nnode are OK */ ++ nnode = first_nnode(c, &hght); ++ for (; nnode; nnode = next_nnode(c, nnode, &hght)) { ++ struct ubifs_nbranch *branch; ++ ++ cond_resched(); ++ if (nnode->parent) { ++ branch = &nnode->parent->nbranch[nnode->iip]; ++ if (branch->lnum != lnum || branch->offs != offs) ++ continue; ++ if (test_bit(DIRTY_CNODE, &nnode->flags)) ++ return 1; ++ return 0; ++ } else { ++ if (c->lpt_lnum != lnum || c->lpt_offs != offs) ++ continue; ++ if (test_bit(DIRTY_CNODE, &nnode->flags)) ++ return 1; ++ return 0; ++ } ++ } ++ return 1; ++} ++ ++/** ++ * dbg_is_pnode_dirty - determine if a pnode is dirty. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB number where pnode was written ++ * @offs: offset where pnode was written ++ */ ++static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs) ++{ ++ int i, cnt; ++ ++ cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); ++ for (i = 0; i < cnt; i++) { ++ struct ubifs_pnode *pnode; ++ struct ubifs_nbranch *branch; ++ ++ cond_resched(); ++ pnode = pnode_lookup(c, i); ++ if (IS_ERR(pnode)) ++ return PTR_ERR(pnode); ++ branch = &pnode->parent->nbranch[pnode->iip]; ++ if (branch->lnum != lnum || branch->offs != offs) ++ continue; ++ if (test_bit(DIRTY_CNODE, &pnode->flags)) ++ return 1; ++ return 0; ++ } ++ return 1; ++} ++ ++/** ++ * dbg_is_ltab_dirty - determine if a ltab node is dirty. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB number where ltab node was written ++ * @offs: offset where ltab node was written ++ */ ++static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs) ++{ ++ if (lnum != c->ltab_lnum || offs != c->ltab_offs) ++ return 1; ++ return (c->lpt_drty_flgs & LTAB_DIRTY) != 0; ++} ++ ++/** ++ * dbg_is_lsave_dirty - determine if a lsave node is dirty. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB number where lsave node was written ++ * @offs: offset where lsave node was written ++ */ ++static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs) ++{ ++ if (lnum != c->lsave_lnum || offs != c->lsave_offs) ++ return 1; ++ return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0; ++} ++ ++/** ++ * dbg_is_node_dirty - determine if a node is dirty. ++ * @c: the UBIFS file-system description object ++ * @node_type: node type ++ * @lnum: LEB number where node was written ++ * @offs: offset where node was written ++ */ ++static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum, ++ int offs) ++{ ++ switch (node_type) { ++ case UBIFS_LPT_NNODE: ++ return dbg_is_nnode_dirty(c, lnum, offs); ++ case UBIFS_LPT_PNODE: ++ return dbg_is_pnode_dirty(c, lnum, offs); ++ case UBIFS_LPT_LTAB: ++ return dbg_is_ltab_dirty(c, lnum, offs); ++ case UBIFS_LPT_LSAVE: ++ return dbg_is_lsave_dirty(c, lnum, offs); ++ } ++ return 1; ++} ++ ++/** ++ * dbg_check_ltab_lnum - check the ltab for a LPT LEB number. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB number where node was written ++ * @offs: offset where node was written ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) ++{ ++ int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; ++ int ret; ++ void *buf = c->dbg->buf; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ return 0; ++ ++ dbg_lp("LEB %d", lnum); ++ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); ++ if (err) { ++ dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); ++ return err; ++ } ++ while (1) { ++ if (!is_a_node(c, buf, len)) { ++ int i, pad_len; ++ ++ pad_len = get_pad_len(c, buf, len); ++ if (pad_len) { ++ buf += pad_len; ++ len -= pad_len; ++ dirty += pad_len; ++ continue; ++ } ++ if (!dbg_is_all_ff(buf, len)) { ++ dbg_msg("invalid empty space in LEB %d at %d", ++ lnum, c->leb_size - len); ++ err = -EINVAL; ++ } ++ i = lnum - c->lpt_first; ++ if (len != c->ltab[i].free) { ++ dbg_msg("invalid free space in LEB %d " ++ "(free %d, expected %d)", ++ lnum, len, c->ltab[i].free); ++ err = -EINVAL; ++ } ++ if (dirty != c->ltab[i].dirty) { ++ dbg_msg("invalid dirty space in LEB %d " ++ "(dirty %d, expected %d)", ++ lnum, dirty, c->ltab[i].dirty); ++ err = -EINVAL; ++ } ++ return err; ++ } ++ node_type = get_lpt_node_type(c, buf, &node_num); ++ node_len = get_lpt_node_len(c, node_type); ++ ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); ++ if (ret == 1) ++ dirty += node_len; ++ buf += node_len; ++ len -= node_len; ++ } ++} ++ ++/** ++ * dbg_check_ltab - check the free and dirty space in the ltab. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int dbg_check_ltab(struct ubifs_info *c) ++{ ++ int lnum, err, i, cnt; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ return 0; ++ ++ /* Bring the entire tree into memory */ ++ cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); ++ for (i = 0; i < cnt; i++) { ++ struct ubifs_pnode *pnode; ++ ++ pnode = pnode_lookup(c, i); ++ if (IS_ERR(pnode)) ++ return PTR_ERR(pnode); ++ cond_resched(); ++ } ++ ++ /* Check nodes */ ++ err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0); ++ if (err) ++ return err; ++ ++ /* Check each LEB */ ++ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { ++ err = dbg_check_ltab_lnum(c, lnum); ++ if (err) { ++ dbg_err("failed at LEB %d", lnum); ++ return err; ++ } ++ } ++ ++ dbg_lp("succeeded"); ++ return 0; ++} ++ ++/** ++ * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT. ++ * @c: the UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int dbg_chk_lpt_free_spc(struct ubifs_info *c) ++{ ++ long long free = 0; ++ int i; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ return 0; ++ ++ for (i = 0; i < c->lpt_lebs; i++) { ++ if (c->ltab[i].tgc || c->ltab[i].cmt) ++ continue; ++ if (i + c->lpt_first == c->nhead_lnum) ++ free += c->leb_size - c->nhead_offs; ++ else if (c->ltab[i].free == c->leb_size) ++ free += c->leb_size; ++ } ++ if (free < c->lpt_sz) { ++ dbg_err("LPT space error: free %lld lpt_sz %lld", ++ free, c->lpt_sz); ++ dbg_dump_lpt_info(c); ++ dbg_dump_lpt_lebs(c); ++ dump_stack(); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++/** ++ * dbg_chk_lpt_sz - check LPT does not write more than LPT size. ++ * @c: the UBIFS file-system description object ++ * @action: what to do ++ * @len: length written ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ * The @action argument may be one of: ++ * o %0 - LPT debugging checking starts, initialize debugging variables; ++ * o %1 - wrote an LPT node, increase LPT size by @len bytes; ++ * o %2 - switched to a different LEB and wasted @len bytes; ++ * o %3 - check that we've written the right number of bytes. ++ * o %4 - wasted @len bytes; ++ */ ++int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) ++{ ++ struct ubifs_debug_info *d = c->dbg; ++ long long chk_lpt_sz, lpt_sz; ++ int err = 0; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ return 0; ++ ++ switch (action) { ++ case 0: ++ d->chk_lpt_sz = 0; ++ d->chk_lpt_sz2 = 0; ++ d->chk_lpt_lebs = 0; ++ d->chk_lpt_wastage = 0; ++ if (c->dirty_pn_cnt > c->pnode_cnt) { ++ dbg_err("dirty pnodes %d exceed max %d", ++ c->dirty_pn_cnt, c->pnode_cnt); ++ err = -EINVAL; ++ } ++ if (c->dirty_nn_cnt > c->nnode_cnt) { ++ dbg_err("dirty nnodes %d exceed max %d", ++ c->dirty_nn_cnt, c->nnode_cnt); ++ err = -EINVAL; ++ } ++ return err; ++ case 1: ++ d->chk_lpt_sz += len; ++ return 0; ++ case 2: ++ d->chk_lpt_sz += len; ++ d->chk_lpt_wastage += len; ++ d->chk_lpt_lebs += 1; ++ return 0; ++ case 3: ++ chk_lpt_sz = c->leb_size; ++ chk_lpt_sz *= d->chk_lpt_lebs; ++ chk_lpt_sz += len - c->nhead_offs; ++ if (d->chk_lpt_sz != chk_lpt_sz) { ++ dbg_err("LPT wrote %lld but space used was %lld", ++ d->chk_lpt_sz, chk_lpt_sz); ++ err = -EINVAL; ++ } ++ if (d->chk_lpt_sz > c->lpt_sz) { ++ dbg_err("LPT wrote %lld but lpt_sz is %lld", ++ d->chk_lpt_sz, c->lpt_sz); ++ err = -EINVAL; ++ } ++ if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { ++ dbg_err("LPT layout size %lld but wrote %lld", ++ d->chk_lpt_sz, d->chk_lpt_sz2); ++ err = -EINVAL; ++ } ++ if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { ++ dbg_err("LPT new nhead offs: expected %d was %d", ++ d->new_nhead_offs, len); ++ err = -EINVAL; ++ } ++ lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; ++ lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; ++ lpt_sz += c->ltab_sz; ++ if (c->big_lpt) ++ lpt_sz += c->lsave_sz; ++ if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { ++ dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", ++ d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); ++ err = -EINVAL; ++ } ++ if (err) { ++ dbg_dump_lpt_info(c); ++ dbg_dump_lpt_lebs(c); ++ dump_stack(); ++ } ++ d->chk_lpt_sz2 = d->chk_lpt_sz; ++ d->chk_lpt_sz = 0; ++ d->chk_lpt_wastage = 0; ++ d->chk_lpt_lebs = 0; ++ d->new_nhead_offs = len; ++ return err; ++ case 4: ++ d->chk_lpt_sz += len; ++ d->chk_lpt_wastage += len; ++ return 0; ++ default: ++ return -EINVAL; ++ } ++} ++ ++/** ++ * dbg_dump_lpt_leb - dump an LPT LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to dump ++ * ++ * This function dumps an LEB from LPT area. Nodes in this area are very ++ * different to nodes in the main area (e.g., they do not have common headers, ++ * they do not have 8-byte alignments, etc), so we have a separate function to ++ * dump LPT area LEBs. Note, LPT has to be locked by the caller. ++ */ ++static void dump_lpt_leb(const struct ubifs_info *c, int lnum) ++{ ++ int err, len = c->leb_size, node_type, node_num, node_len, offs; ++ void *buf = c->dbg->buf; ++ ++ printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", ++ current->pid, lnum); ++ err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); ++ if (err) { ++ ubifs_err("cannot read LEB %d, error %d", lnum, err); ++ return; ++ } ++ while (1) { ++ offs = c->leb_size - len; ++ if (!is_a_node(c, buf, len)) { ++ int pad_len; ++ ++ pad_len = get_pad_len(c, buf, len); ++ if (pad_len) { ++ printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", ++ lnum, offs, pad_len); ++ buf += pad_len; ++ len -= pad_len; ++ continue; ++ } ++ if (len) ++ printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", ++ lnum, offs, len); ++ break; ++ } ++ ++ node_type = get_lpt_node_type(c, buf, &node_num); ++ switch (node_type) { ++ case UBIFS_LPT_PNODE: ++ { ++ node_len = c->pnode_sz; ++ if (c->big_lpt) ++ printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", ++ lnum, offs, node_num); ++ else ++ printk(KERN_DEBUG "LEB %d:%d, pnode\n", ++ lnum, offs); ++ break; ++ } ++ case UBIFS_LPT_NNODE: ++ { ++ int i; ++ struct ubifs_nnode nnode; ++ ++ node_len = c->nnode_sz; ++ if (c->big_lpt) ++ printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", ++ lnum, offs, node_num); ++ else ++ printk(KERN_DEBUG "LEB %d:%d, nnode, ", ++ lnum, offs); ++ err = ubifs_unpack_nnode(c, buf, &nnode); ++ for (i = 0; i < UBIFS_LPT_FANOUT; i++) { ++ printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, ++ nnode.nbranch[i].offs); ++ if (i != UBIFS_LPT_FANOUT - 1) ++ printk(KERN_CONT ", "); ++ } ++ printk(KERN_CONT "\n"); ++ break; ++ } ++ case UBIFS_LPT_LTAB: ++ node_len = c->ltab_sz; ++ printk(KERN_DEBUG "LEB %d:%d, ltab\n", ++ lnum, offs); ++ break; ++ case UBIFS_LPT_LSAVE: ++ node_len = c->lsave_sz; ++ printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); ++ break; ++ default: ++ ubifs_err("LPT node type %d not recognized", node_type); ++ return; ++ } ++ ++ buf += node_len; ++ len -= node_len; ++ } ++ ++ printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", ++ current->pid, lnum); ++} ++ ++/** ++ * dbg_dump_lpt_lebs - dump LPT lebs. ++ * @c: UBIFS file-system description object ++ * ++ * This function dumps all LPT LEBs. The caller has to make sure the LPT is ++ * locked. ++ */ ++void dbg_dump_lpt_lebs(const struct ubifs_info *c) ++{ ++ int i; ++ ++ printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", ++ current->pid); ++ for (i = 0; i < c->lpt_lebs; i++) ++ dump_lpt_leb(c, i + c->lpt_first); ++ printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", ++ current->pid); ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -Nurd linux-2.6.24/fs/ubifs/master.c ubifs-v2.6.24/fs/ubifs/master.c +--- linux-2.6.24/fs/ubifs/master.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/master.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,387 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* This file implements reading and writing the master node */ ++ ++#include "ubifs.h" ++ ++/** ++ * scan_for_master - search the valid master node. ++ * @c: UBIFS file-system description object ++ * ++ * This function scans the master node LEBs and search for the latest master ++ * node. Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++static int scan_for_master(struct ubifs_info *c) ++{ ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ int lnum, offs = 0, nodes_cnt; ++ ++ lnum = UBIFS_MST_LNUM; ++ ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ if (IS_ERR(sleb)) ++ return PTR_ERR(sleb); ++ nodes_cnt = sleb->nodes_cnt; ++ if (nodes_cnt > 0) { ++ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, ++ list); ++ if (snod->type != UBIFS_MST_NODE) ++ goto out; ++ memcpy(c->mst_node, snod->node, snod->len); ++ offs = snod->offs; ++ } ++ ubifs_scan_destroy(sleb); ++ ++ lnum += 1; ++ ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ if (IS_ERR(sleb)) ++ return PTR_ERR(sleb); ++ if (sleb->nodes_cnt != nodes_cnt) ++ goto out; ++ if (!sleb->nodes_cnt) ++ goto out; ++ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); ++ if (snod->type != UBIFS_MST_NODE) ++ goto out; ++ if (snod->offs != offs) ++ goto out; ++ if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, ++ (void *)snod->node + UBIFS_CH_SZ, ++ UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) ++ goto out; ++ c->mst_offs = offs; ++ ubifs_scan_destroy(sleb); ++ return 0; ++ ++out: ++ ubifs_scan_destroy(sleb); ++ return -EINVAL; ++} ++ ++/** ++ * validate_master - validate master node. ++ * @c: UBIFS file-system description object ++ * ++ * This function validates data which was read from master node. Returns zero ++ * if the data is all right and %-EINVAL if not. ++ */ ++static int validate_master(const struct ubifs_info *c) ++{ ++ long long main_sz; ++ int err; ++ ++ if (c->max_sqnum >= SQNUM_WATERMARK) { ++ err = 1; ++ goto out; ++ } ++ ++ if (c->cmt_no >= c->max_sqnum) { ++ err = 2; ++ goto out; ++ } ++ ++ if (c->highest_inum >= INUM_WATERMARK) { ++ err = 3; ++ goto out; ++ } ++ ++ if (c->lhead_lnum < UBIFS_LOG_LNUM || ++ c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs || ++ c->lhead_offs < 0 || c->lhead_offs >= c->leb_size || ++ c->lhead_offs & (c->min_io_size - 1)) { ++ err = 4; ++ goto out; ++ } ++ ++ if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first || ++ c->zroot.offs >= c->leb_size || c->zroot.offs & 7) { ++ err = 5; ++ goto out; ++ } ++ ++ if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len || ++ c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) { ++ err = 6; ++ goto out; ++ } ++ ++ if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) { ++ err = 7; ++ goto out; ++ } ++ ++ if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first || ++ c->ihead_offs % c->min_io_size || c->ihead_offs < 0 || ++ c->ihead_offs > c->leb_size || c->ihead_offs & 7) { ++ err = 8; ++ goto out; ++ } ++ ++ main_sz = (long long)c->main_lebs * c->leb_size; ++ if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { ++ err = 9; ++ goto out; ++ } ++ ++ if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last || ++ c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) { ++ err = 10; ++ goto out; ++ } ++ ++ if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last || ++ c->nhead_offs < 0 || c->nhead_offs % c->min_io_size || ++ c->nhead_offs > c->leb_size) { ++ err = 11; ++ goto out; ++ } ++ ++ if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last || ++ c->ltab_offs < 0 || ++ c->ltab_offs + c->ltab_sz > c->leb_size) { ++ err = 12; ++ goto out; ++ } ++ ++ if (c->big_lpt && (c->lsave_lnum < c->lpt_first || ++ c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 || ++ c->lsave_offs + c->lsave_sz > c->leb_size)) { ++ err = 13; ++ goto out; ++ } ++ ++ if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) { ++ err = 14; ++ goto out; ++ } ++ ++ if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) { ++ err = 15; ++ goto out; ++ } ++ ++ if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) { ++ err = 16; ++ goto out; ++ } ++ ++ if (c->lst.total_free < 0 || c->lst.total_free > main_sz || ++ c->lst.total_free & 7) { ++ err = 17; ++ goto out; ++ } ++ ++ if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) { ++ err = 18; ++ goto out; ++ } ++ ++ if (c->lst.total_used < 0 || (c->lst.total_used & 7)) { ++ err = 19; ++ goto out; ++ } ++ ++ if (c->lst.total_free + c->lst.total_dirty + ++ c->lst.total_used > main_sz) { ++ err = 20; ++ goto out; ++ } ++ ++ if (c->lst.total_dead + c->lst.total_dark + ++ c->lst.total_used + c->old_idx_sz > main_sz) { ++ err = 21; ++ goto out; ++ } ++ ++ if (c->lst.total_dead < 0 || ++ c->lst.total_dead > c->lst.total_free + c->lst.total_dirty || ++ c->lst.total_dead & 7) { ++ err = 22; ++ goto out; ++ } ++ ++ if (c->lst.total_dark < 0 || ++ c->lst.total_dark > c->lst.total_free + c->lst.total_dirty || ++ c->lst.total_dark & 7) { ++ err = 23; ++ goto out; ++ } ++ ++ return 0; ++ ++out: ++ ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); ++ dbg_dump_node(c, c->mst_node); ++ return -EINVAL; ++} ++ ++/** ++ * ubifs_read_master - read master node. ++ * @c: UBIFS file-system description object ++ * ++ * This function finds and reads the master node during file-system mount. If ++ * the flash is empty, it creates default master node as well. Returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++int ubifs_read_master(struct ubifs_info *c) ++{ ++ int err, old_leb_cnt; ++ ++ c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL); ++ if (!c->mst_node) ++ return -ENOMEM; ++ ++ err = scan_for_master(c); ++ if (err) { ++ err = ubifs_recover_master_node(c); ++ if (err) ++ /* ++ * Note, we do not free 'c->mst_node' here because the ++ * unmount routine will take care of this. ++ */ ++ return err; ++ } ++ ++ /* Make sure that the recovery flag is clear */ ++ c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY); ++ ++ c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum); ++ c->highest_inum = le64_to_cpu(c->mst_node->highest_inum); ++ c->cmt_no = le64_to_cpu(c->mst_node->cmt_no); ++ c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum); ++ c->zroot.offs = le32_to_cpu(c->mst_node->root_offs); ++ c->zroot.len = le32_to_cpu(c->mst_node->root_len); ++ c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum); ++ c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); ++ c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); ++ c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); ++ c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); ++ c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); ++ c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); ++ c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); ++ c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs); ++ c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum); ++ c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs); ++ c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum); ++ c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs); ++ c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum); ++ c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs); ++ c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs); ++ old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt); ++ c->lst.total_free = le64_to_cpu(c->mst_node->total_free); ++ c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty); ++ c->lst.total_used = le64_to_cpu(c->mst_node->total_used); ++ c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); ++ c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); ++ ++ c->calc_idx_sz = c->old_idx_sz; ++ ++ if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) ++ c->no_orphs = 1; ++ ++ if (old_leb_cnt != c->leb_cnt) { ++ /* The file system has been resized */ ++ int growth = c->leb_cnt - old_leb_cnt; ++ ++ if (c->leb_cnt < old_leb_cnt || ++ c->leb_cnt < UBIFS_MIN_LEB_CNT) { ++ ubifs_err("bad leb_cnt on master node"); ++ dbg_dump_node(c, c->mst_node); ++ return -EINVAL; ++ } ++ ++ dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs", ++ old_leb_cnt, c->leb_cnt); ++ c->lst.empty_lebs += growth; ++ c->lst.total_free += growth * (long long)c->leb_size; ++ c->lst.total_dark += growth * (long long)c->dark_wm; ++ ++ /* ++ * Reflect changes back onto the master node. N.B. the master ++ * node gets written immediately whenever mounting (or ++ * remounting) in read-write mode, so we do not need to write it ++ * here. ++ */ ++ c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt); ++ c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs); ++ c->mst_node->total_free = cpu_to_le64(c->lst.total_free); ++ c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark); ++ } ++ ++ err = validate_master(c); ++ if (err) ++ return err; ++ ++ err = dbg_old_index_check_init(c, &c->zroot); ++ ++ return err; ++} ++ ++/** ++ * ubifs_write_master - write master node. ++ * @c: UBIFS file-system description object ++ * ++ * This function writes the master node. The caller has to take the ++ * @c->mst_mutex lock before calling this function. Returns zero in case of ++ * success and a negative error code in case of failure. The master node is ++ * written twice to enable recovery. ++ */ ++int ubifs_write_master(struct ubifs_info *c) ++{ ++ int err, lnum, offs, len; ++ ++ if (c->ro_media) ++ return -EROFS; ++ ++ lnum = UBIFS_MST_LNUM; ++ offs = c->mst_offs + c->mst_node_alsz; ++ len = UBIFS_MST_NODE_SZ; ++ ++ if (offs + UBIFS_MST_NODE_SZ > c->leb_size) { ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ offs = 0; ++ } ++ ++ c->mst_offs = offs; ++ c->mst_node->highest_inum = cpu_to_le64(c->highest_inum); ++ ++ err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); ++ if (err) ++ return err; ++ ++ lnum += 1; ++ ++ if (offs == 0) { ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } ++ err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM); ++ ++ return err; ++} +diff -Nurd linux-2.6.24/fs/ubifs/misc.h ubifs-v2.6.24/fs/ubifs/misc.h +--- linux-2.6.24/fs/ubifs/misc.h 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/misc.h 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,340 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file contains miscellaneous helper functions. ++ */ ++ ++#ifndef __UBIFS_MISC_H__ ++#define __UBIFS_MISC_H__ ++ ++/** ++ * ubifs_zn_dirty - check if znode is dirty. ++ * @znode: znode to check ++ * ++ * This helper function returns %1 if @znode is dirty and %0 otherwise. ++ */ ++static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) ++{ ++ return !!test_bit(DIRTY_ZNODE, &znode->flags); ++} ++ ++/** ++ * ubifs_wake_up_bgt - wake up background thread. ++ * @c: UBIFS file-system description object ++ */ ++static inline void ubifs_wake_up_bgt(struct ubifs_info *c) ++{ ++ if (c->bgt && !c->need_bgt) { ++ c->need_bgt = 1; ++ wake_up_process(c->bgt); ++ } ++} ++ ++/** ++ * ubifs_tnc_find_child - find next child in znode. ++ * @znode: znode to search at ++ * @start: the zbranch index to start at ++ * ++ * This helper function looks for znode child starting at index @start. Returns ++ * the child or %NULL if no children were found. ++ */ ++static inline struct ubifs_znode * ++ubifs_tnc_find_child(struct ubifs_znode *znode, int start) ++{ ++ while (start < znode->child_cnt) { ++ if (znode->zbranch[start].znode) ++ return znode->zbranch[start].znode; ++ start += 1; ++ } ++ ++ return NULL; ++} ++ ++/** ++ * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object. ++ * @inode: the VFS 'struct inode' pointer ++ */ ++static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) ++{ ++ return container_of(inode, struct ubifs_inode, vfs_inode); ++} ++ ++/** ++ * ubifs_compr_present - check if compressor was compiled in. ++ * @compr_type: compressor type to check ++ * ++ * This function returns %1 of compressor of type @compr_type is present, and ++ * %0 if not. ++ */ ++static inline int ubifs_compr_present(int compr_type) ++{ ++ ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); ++ return !!ubifs_compressors[compr_type]->capi_name; ++} ++ ++/** ++ * ubifs_compr_name - get compressor name string by its type. ++ * @compr_type: compressor type ++ * ++ * This function returns compressor type string. ++ */ ++static inline const char *ubifs_compr_name(int compr_type) ++{ ++ ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); ++ return ubifs_compressors[compr_type]->name; ++} ++ ++/** ++ * ubifs_wbuf_sync - synchronize write-buffer. ++ * @wbuf: write-buffer to synchronize ++ * ++ * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume ++ * that the write-buffer is already locked. ++ */ ++static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) ++{ ++ int err; ++ ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ mutex_unlock(&wbuf->io_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_leb_unmap - unmap an LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to unmap ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) ++{ ++ int err; ++ ++ if (c->ro_media) ++ return -EROFS; ++ err = ubi_leb_unmap(c->ubi, lnum); ++ if (err) { ++ ubifs_err("unmap LEB %d failed, error %d", lnum, err); ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * ubifs_leb_write - write to a LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to write ++ * @buf: buffer to write from ++ * @offs: offset within LEB to write to ++ * @len: length to write ++ * @dtype: data type ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, ++ const void *buf, int offs, int len, int dtype) ++{ ++ int err; ++ ++ if (c->ro_media) ++ return -EROFS; ++ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); ++ if (err) { ++ ubifs_err("writing %d bytes at %d:%d, error %d", ++ len, lnum, offs, err); ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * ubifs_leb_change - atomic LEB change. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number to write ++ * @buf: buffer to write from ++ * @len: length to write ++ * @dtype: data type ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, ++ const void *buf, int len, int dtype) ++{ ++ int err; ++ ++ if (c->ro_media) ++ return -EROFS; ++ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); ++ if (err) { ++ ubifs_err("changing %d bytes in LEB %d, error %d", ++ len, lnum, err); ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * ubifs_encode_dev - encode device node IDs. ++ * @dev: UBIFS device node information ++ * @rdev: device IDs to encode ++ * ++ * This is a helper function which encodes major/minor numbers of a device node ++ * into UBIFS device node description. We use standard Linux "new" and "huge" ++ * encodings. ++ */ ++static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev) ++{ ++ if (new_valid_dev(rdev)) { ++ dev->new = cpu_to_le32(new_encode_dev(rdev)); ++ return sizeof(dev->new); ++ } else { ++ dev->huge = cpu_to_le64(huge_encode_dev(rdev)); ++ return sizeof(dev->huge); ++ } ++} ++ ++/** ++ * ubifs_add_dirt - add dirty space to LEB properties. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB to add dirty space for ++ * @dirty: dirty space to add ++ * ++ * This is a helper function which increased amount of dirty LEB space. Returns ++ * zero in case of success and a negative error code in case of failure. ++ */ ++static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty) ++{ ++ return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0); ++} ++ ++/** ++ * ubifs_return_leb - return LEB to lprops. ++ * @c: the UBIFS file-system description object ++ * @lnum: LEB to return ++ * ++ * This helper function cleans the "taken" flag of a logical eraseblock in the ++ * lprops. Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++static inline int ubifs_return_leb(struct ubifs_info *c, int lnum) ++{ ++ return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_TAKEN, 0); ++} ++ ++/** ++ * ubifs_idx_node_sz - return index node size. ++ * @c: the UBIFS file-system description object ++ * @child_cnt: number of children of this index node ++ */ ++static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt) ++{ ++ return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt; ++} ++ ++/** ++ * ubifs_idx_branch - return pointer to an index branch. ++ * @c: the UBIFS file-system description object ++ * @idx: index node ++ * @bnum: branch number ++ */ ++static inline ++struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, ++ const struct ubifs_idx_node *idx, ++ int bnum) ++{ ++ return (struct ubifs_branch *)((void *)idx->branches + ++ (UBIFS_BRANCH_SZ + c->key_len) * bnum); ++} ++ ++/** ++ * ubifs_idx_key - return pointer to an index key. ++ * @c: the UBIFS file-system description object ++ * @idx: index node ++ */ ++static inline void *ubifs_idx_key(const struct ubifs_info *c, ++ const struct ubifs_idx_node *idx) ++{ ++ return (void *)((struct ubifs_branch *)idx->branches)->key; ++} ++ ++/** ++ * ubifs_current_time - round current time to time granularity. ++ * @inode: inode ++ */ ++static inline struct timespec ubifs_current_time(struct inode *inode) ++{ ++ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? ++ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; ++} ++ ++/** ++ * ubifs_tnc_lookup - look up a file-system node. ++ * @c: UBIFS file-system description object ++ * @key: node key to lookup ++ * @node: the node is returned here ++ * ++ * This function look up and reads node with key @key. The caller has to make ++ * sure the @node buffer is large enough to fit the node. Returns zero in case ++ * of success, %-ENOENT if the node was not found, and a negative error code in ++ * case of failure. ++ */ ++static inline int ubifs_tnc_lookup(struct ubifs_info *c, ++ const union ubifs_key *key, void *node) ++{ ++ return ubifs_tnc_locate(c, key, node, NULL, NULL); ++} ++ ++/** ++ * ubifs_get_lprops - get reference to LEB properties. ++ * @c: the UBIFS file-system description object ++ * ++ * This function locks lprops. Lprops have to be unlocked by ++ * 'ubifs_release_lprops()'. ++ */ ++static inline void ubifs_get_lprops(struct ubifs_info *c) ++{ ++ mutex_lock(&c->lp_mutex); ++} ++ ++/** ++ * ubifs_release_lprops - release lprops lock. ++ * @c: the UBIFS file-system description object ++ * ++ * This function has to be called after each 'ubifs_get_lprops()' call to ++ * unlock lprops. ++ */ ++static inline void ubifs_release_lprops(struct ubifs_info *c) ++{ ++ ubifs_assert(mutex_is_locked(&c->lp_mutex)); ++ ubifs_assert(c->lst.empty_lebs >= 0 && ++ c->lst.empty_lebs <= c->main_lebs); ++ mutex_unlock(&c->lp_mutex); ++} ++ ++#endif /* __UBIFS_MISC_H__ */ +diff -Nurd linux-2.6.24/fs/ubifs/orphan.c ubifs-v2.6.24/fs/ubifs/orphan.c +--- linux-2.6.24/fs/ubifs/orphan.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/orphan.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,962 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Author: Adrian Hunter ++ */ ++ ++#include "ubifs.h" ++ ++/* ++ * An orphan is an inode number whose inode node has been committed to the index ++ * with a link count of zero. That happens when an open file is deleted ++ * (unlinked) and then a commit is run. In the normal course of events the inode ++ * would be deleted when the file is closed. However in the case of an unclean ++ * unmount, orphans need to be accounted for. After an unclean unmount, the ++ * orphans' inodes must be deleted which means either scanning the entire index ++ * looking for them, or keeping a list on flash somewhere. This unit implements ++ * the latter approach. ++ * ++ * The orphan area is a fixed number of LEBs situated between the LPT area and ++ * the main area. The number of orphan area LEBs is specified when the file ++ * system is created. The minimum number is 1. The size of the orphan area ++ * should be so that it can hold the maximum number of orphans that are expected ++ * to ever exist at one time. ++ * ++ * The number of orphans that can fit in a LEB is: ++ * ++ * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) ++ * ++ * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. ++ * ++ * Orphans are accumulated in a rb-tree. When an inode's link count drops to ++ * zero, the inode number is added to the rb-tree. It is removed from the tree ++ * when the inode is deleted. Any new orphans that are in the orphan tree when ++ * the commit is run, are written to the orphan area in 1 or more orphan nodes. ++ * If the orphan area is full, it is consolidated to make space. There is ++ * always enough space because validation prevents the user from creating more ++ * than the maximum number of orphans allowed. ++ */ ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++static int dbg_check_orphans(struct ubifs_info *c); ++#else ++#define dbg_check_orphans(c) 0 ++#endif ++ ++/** ++ * ubifs_add_orphan - add an orphan. ++ * @c: UBIFS file-system description object ++ * @inum: orphan inode number ++ * ++ * Add an orphan. This function is called when an inodes link count drops to ++ * zero. ++ */ ++int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) ++{ ++ struct ubifs_orphan *orphan, *o; ++ struct rb_node **p, *parent = NULL; ++ ++ orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); ++ if (!orphan) ++ return -ENOMEM; ++ orphan->inum = inum; ++ orphan->new = 1; ++ ++ spin_lock(&c->orphan_lock); ++ if (c->tot_orphans >= c->max_orphans) { ++ spin_unlock(&c->orphan_lock); ++ kfree(orphan); ++ return -ENFILE; ++ } ++ p = &c->orph_tree.rb_node; ++ while (*p) { ++ parent = *p; ++ o = rb_entry(parent, struct ubifs_orphan, rb); ++ if (inum < o->inum) ++ p = &(*p)->rb_left; ++ else if (inum > o->inum) ++ p = &(*p)->rb_right; ++ else { ++ dbg_err("orphaned twice"); ++ spin_unlock(&c->orphan_lock); ++ kfree(orphan); ++ return 0; ++ } ++ } ++ c->tot_orphans += 1; ++ c->new_orphans += 1; ++ rb_link_node(&orphan->rb, parent, p); ++ rb_insert_color(&orphan->rb, &c->orph_tree); ++ list_add_tail(&orphan->list, &c->orph_list); ++ list_add_tail(&orphan->new_list, &c->orph_new); ++ spin_unlock(&c->orphan_lock); ++ dbg_gen("ino %lu", (unsigned long)inum); ++ return 0; ++} ++ ++/** ++ * ubifs_delete_orphan - delete an orphan. ++ * @c: UBIFS file-system description object ++ * @inum: orphan inode number ++ * ++ * Delete an orphan. This function is called when an inode is deleted. ++ */ ++void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) ++{ ++ struct ubifs_orphan *o; ++ struct rb_node *p; ++ ++ spin_lock(&c->orphan_lock); ++ p = c->orph_tree.rb_node; ++ while (p) { ++ o = rb_entry(p, struct ubifs_orphan, rb); ++ if (inum < o->inum) ++ p = p->rb_left; ++ else if (inum > o->inum) ++ p = p->rb_right; ++ else { ++ if (o->dnext) { ++ spin_unlock(&c->orphan_lock); ++ dbg_gen("deleted twice ino %lu", ++ (unsigned long)inum); ++ return; ++ } ++ if (o->cnext) { ++ o->dnext = c->orph_dnext; ++ c->orph_dnext = o; ++ spin_unlock(&c->orphan_lock); ++ dbg_gen("delete later ino %lu", ++ (unsigned long)inum); ++ return; ++ } ++ rb_erase(p, &c->orph_tree); ++ list_del(&o->list); ++ c->tot_orphans -= 1; ++ if (o->new) { ++ list_del(&o->new_list); ++ c->new_orphans -= 1; ++ } ++ spin_unlock(&c->orphan_lock); ++ kfree(o); ++ dbg_gen("inum %lu", (unsigned long)inum); ++ return; ++ } ++ } ++ spin_unlock(&c->orphan_lock); ++ dbg_err("missing orphan ino %lu", (unsigned long)inum); ++ dbg_dump_stack(); ++} ++ ++/** ++ * ubifs_orphan_start_commit - start commit of orphans. ++ * @c: UBIFS file-system description object ++ * ++ * Start commit of orphans. ++ */ ++int ubifs_orphan_start_commit(struct ubifs_info *c) ++{ ++ struct ubifs_orphan *orphan, **last; ++ ++ spin_lock(&c->orphan_lock); ++ last = &c->orph_cnext; ++ list_for_each_entry(orphan, &c->orph_new, new_list) { ++ ubifs_assert(orphan->new); ++ orphan->new = 0; ++ *last = orphan; ++ last = &orphan->cnext; ++ } ++ *last = orphan->cnext; ++ c->cmt_orphans = c->new_orphans; ++ c->new_orphans = 0; ++ dbg_cmt("%d orphans to commit", c->cmt_orphans); ++ INIT_LIST_HEAD(&c->orph_new); ++ if (c->tot_orphans == 0) ++ c->no_orphs = 1; ++ else ++ c->no_orphs = 0; ++ spin_unlock(&c->orphan_lock); ++ return 0; ++} ++ ++/** ++ * avail_orphs - calculate available space. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns the number of orphans that can be written in the ++ * available space. ++ */ ++static int avail_orphs(struct ubifs_info *c) ++{ ++ int avail_lebs, avail, gap; ++ ++ avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1; ++ avail = avail_lebs * ++ ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); ++ gap = c->leb_size - c->ohead_offs; ++ if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64)) ++ avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); ++ return avail; ++} ++ ++/** ++ * tot_avail_orphs - calculate total space. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns the number of orphans that can be written in half ++ * the total space. That leaves half the space for adding new orphans. ++ */ ++static int tot_avail_orphs(struct ubifs_info *c) ++{ ++ int avail_lebs, avail; ++ ++ avail_lebs = c->orph_lebs; ++ avail = avail_lebs * ++ ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); ++ return avail / 2; ++} ++ ++/** ++ * do_write_orph_node - write a node to the orphan head. ++ * @c: UBIFS file-system description object ++ * @len: length of node ++ * @atomic: write atomically ++ * ++ * This function writes a node to the orphan head from the orphan buffer. If ++ * %atomic is not zero, then the write is done atomically. On success, %0 is ++ * returned, otherwise a negative error code is returned. ++ */ ++static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) ++{ ++ int err = 0; ++ ++ if (atomic) { ++ ubifs_assert(c->ohead_offs == 0); ++ ubifs_prepare_node(c, c->orph_buf, len, 1); ++ len = ALIGN(len, c->min_io_size); ++ err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len, ++ UBI_SHORTTERM); ++ } else { ++ if (c->ohead_offs == 0) { ++ /* Ensure LEB has been unmapped */ ++ err = ubifs_leb_unmap(c, c->ohead_lnum); ++ if (err) ++ return err; ++ } ++ err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum, ++ c->ohead_offs, UBI_SHORTTERM); ++ } ++ return err; ++} ++ ++/** ++ * write_orph_node - write an orphan node. ++ * @c: UBIFS file-system description object ++ * @atomic: write atomically ++ * ++ * This function builds an orphan node from the cnext list and writes it to the ++ * orphan head. On success, %0 is returned, otherwise a negative error code ++ * is returned. ++ */ ++static int write_orph_node(struct ubifs_info *c, int atomic) ++{ ++ struct ubifs_orphan *orphan, *cnext; ++ struct ubifs_orph_node *orph; ++ int gap, err, len, cnt, i; ++ ++ ubifs_assert(c->cmt_orphans > 0); ++ gap = c->leb_size - c->ohead_offs; ++ if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) { ++ c->ohead_lnum += 1; ++ c->ohead_offs = 0; ++ gap = c->leb_size; ++ if (c->ohead_lnum > c->orph_last) { ++ /* ++ * We limit the number of orphans so that this should ++ * never happen. ++ */ ++ ubifs_err("out of space in orphan area"); ++ return -EINVAL; ++ } ++ } ++ cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64); ++ if (cnt > c->cmt_orphans) ++ cnt = c->cmt_orphans; ++ len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64); ++ ubifs_assert(c->orph_buf); ++ orph = c->orph_buf; ++ orph->ch.node_type = UBIFS_ORPH_NODE; ++ spin_lock(&c->orphan_lock); ++ cnext = c->orph_cnext; ++ for (i = 0; i < cnt; i++) { ++ orphan = cnext; ++ orph->inos[i] = cpu_to_le64(orphan->inum); ++ cnext = orphan->cnext; ++ orphan->cnext = NULL; ++ } ++ c->orph_cnext = cnext; ++ c->cmt_orphans -= cnt; ++ spin_unlock(&c->orphan_lock); ++ if (c->cmt_orphans) ++ orph->cmt_no = cpu_to_le64(c->cmt_no); ++ else ++ /* Mark the last node of the commit */ ++ orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63)); ++ ubifs_assert(c->ohead_offs + len <= c->leb_size); ++ ubifs_assert(c->ohead_lnum >= c->orph_first); ++ ubifs_assert(c->ohead_lnum <= c->orph_last); ++ err = do_write_orph_node(c, len, atomic); ++ c->ohead_offs += ALIGN(len, c->min_io_size); ++ c->ohead_offs = ALIGN(c->ohead_offs, 8); ++ return err; ++} ++ ++/** ++ * write_orph_nodes - write orphan nodes until there are no more to commit. ++ * @c: UBIFS file-system description object ++ * @atomic: write atomically ++ * ++ * This function writes orphan nodes for all the orphans to commit. On success, ++ * %0 is returned, otherwise a negative error code is returned. ++ */ ++static int write_orph_nodes(struct ubifs_info *c, int atomic) ++{ ++ int err; ++ ++ while (c->cmt_orphans > 0) { ++ err = write_orph_node(c, atomic); ++ if (err) ++ return err; ++ } ++ if (atomic) { ++ int lnum; ++ ++ /* Unmap any unused LEBs after consolidation */ ++ lnum = c->ohead_lnum + 1; ++ for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * consolidate - consolidate the orphan area. ++ * @c: UBIFS file-system description object ++ * ++ * This function enables consolidation by putting all the orphans into the list ++ * to commit. The list is in the order that the orphans were added, and the ++ * LEBs are written atomically in order, so at no time can orphans be lost by ++ * an unclean unmount. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int consolidate(struct ubifs_info *c) ++{ ++ int tot_avail = tot_avail_orphs(c), err = 0; ++ ++ spin_lock(&c->orphan_lock); ++ dbg_cmt("there is space for %d orphans and there are %d", ++ tot_avail, c->tot_orphans); ++ if (c->tot_orphans - c->new_orphans <= tot_avail) { ++ struct ubifs_orphan *orphan, **last; ++ int cnt = 0; ++ ++ /* Change the cnext list to include all non-new orphans */ ++ last = &c->orph_cnext; ++ list_for_each_entry(orphan, &c->orph_list, list) { ++ if (orphan->new) ++ continue; ++ *last = orphan; ++ last = &orphan->cnext; ++ cnt += 1; ++ } ++ *last = orphan->cnext; ++ ubifs_assert(cnt == c->tot_orphans - c->new_orphans); ++ c->cmt_orphans = cnt; ++ c->ohead_lnum = c->orph_first; ++ c->ohead_offs = 0; ++ } else { ++ /* ++ * We limit the number of orphans so that this should ++ * never happen. ++ */ ++ ubifs_err("out of space in orphan area"); ++ err = -EINVAL; ++ } ++ spin_unlock(&c->orphan_lock); ++ return err; ++} ++ ++/** ++ * commit_orphans - commit orphans. ++ * @c: UBIFS file-system description object ++ * ++ * This function commits orphans to flash. On success, %0 is returned, ++ * otherwise a negative error code is returned. ++ */ ++static int commit_orphans(struct ubifs_info *c) ++{ ++ int avail, atomic = 0, err; ++ ++ ubifs_assert(c->cmt_orphans > 0); ++ avail = avail_orphs(c); ++ if (avail < c->cmt_orphans) { ++ /* Not enough space to write new orphans, so consolidate */ ++ err = consolidate(c); ++ if (err) ++ return err; ++ atomic = 1; ++ } ++ err = write_orph_nodes(c, atomic); ++ return err; ++} ++ ++/** ++ * erase_deleted - erase the orphans marked for deletion. ++ * @c: UBIFS file-system description object ++ * ++ * During commit, the orphans being committed cannot be deleted, so they are ++ * marked for deletion and deleted by this function. Also, the recovery ++ * adds killed orphans to the deletion list, and therefore they are deleted ++ * here too. ++ */ ++static void erase_deleted(struct ubifs_info *c) ++{ ++ struct ubifs_orphan *orphan, *dnext; ++ ++ spin_lock(&c->orphan_lock); ++ dnext = c->orph_dnext; ++ while (dnext) { ++ orphan = dnext; ++ dnext = orphan->dnext; ++ ubifs_assert(!orphan->new); ++ rb_erase(&orphan->rb, &c->orph_tree); ++ list_del(&orphan->list); ++ c->tot_orphans -= 1; ++ dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum); ++ kfree(orphan); ++ } ++ c->orph_dnext = NULL; ++ spin_unlock(&c->orphan_lock); ++} ++ ++/** ++ * ubifs_orphan_end_commit - end commit of orphans. ++ * @c: UBIFS file-system description object ++ * ++ * End commit of orphans. ++ */ ++int ubifs_orphan_end_commit(struct ubifs_info *c) ++{ ++ int err; ++ ++ if (c->cmt_orphans != 0) { ++ err = commit_orphans(c); ++ if (err) ++ return err; ++ } ++ erase_deleted(c); ++ err = dbg_check_orphans(c); ++ return err; ++} ++ ++/** ++ * ubifs_clear_orphans - erase all LEBs used for orphans. ++ * @c: UBIFS file-system description object ++ * ++ * If recovery is not required, then the orphans from the previous session ++ * are not needed. This function locates the LEBs used to record ++ * orphans, and un-maps them. ++ */ ++int ubifs_clear_orphans(struct ubifs_info *c) ++{ ++ int lnum, err; ++ ++ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } ++ c->ohead_lnum = c->orph_first; ++ c->ohead_offs = 0; ++ return 0; ++} ++ ++/** ++ * insert_dead_orphan - insert an orphan. ++ * @c: UBIFS file-system description object ++ * @inum: orphan inode number ++ * ++ * This function is a helper to the 'do_kill_orphans()' function. The orphan ++ * must be kept until the next commit, so it is added to the rb-tree and the ++ * deletion list. ++ */ ++static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) ++{ ++ struct ubifs_orphan *orphan, *o; ++ struct rb_node **p, *parent = NULL; ++ ++ orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); ++ if (!orphan) ++ return -ENOMEM; ++ orphan->inum = inum; ++ ++ p = &c->orph_tree.rb_node; ++ while (*p) { ++ parent = *p; ++ o = rb_entry(parent, struct ubifs_orphan, rb); ++ if (inum < o->inum) ++ p = &(*p)->rb_left; ++ else if (inum > o->inum) ++ p = &(*p)->rb_right; ++ else { ++ /* Already added - no problem */ ++ kfree(orphan); ++ return 0; ++ } ++ } ++ c->tot_orphans += 1; ++ rb_link_node(&orphan->rb, parent, p); ++ rb_insert_color(&orphan->rb, &c->orph_tree); ++ list_add_tail(&orphan->list, &c->orph_list); ++ orphan->dnext = c->orph_dnext; ++ c->orph_dnext = orphan; ++ dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, ++ c->new_orphans, c->tot_orphans); ++ return 0; ++} ++ ++/** ++ * do_kill_orphans - remove orphan inodes from the index. ++ * @c: UBIFS file-system description object ++ * @sleb: scanned LEB ++ * @last_cmt_no: cmt_no of last orphan node read is passed and returned here ++ * @outofdate: whether the LEB is out of date is returned here ++ * @last_flagged: whether the end orphan node is encountered ++ * ++ * This function is a helper to the 'kill_orphans()' function. It goes through ++ * every orphan node in a LEB and for every inode number recorded, removes ++ * all keys for that inode from the TNC. ++ */ ++static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ unsigned long long *last_cmt_no, int *outofdate, ++ int *last_flagged) ++{ ++ struct ubifs_scan_node *snod; ++ struct ubifs_orph_node *orph; ++ unsigned long long cmt_no; ++ ino_t inum; ++ int i, n, err, first = 1; ++ ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ if (snod->type != UBIFS_ORPH_NODE) { ++ ubifs_err("invalid node type %d in orphan area at " ++ "%d:%d", snod->type, sleb->lnum, snod->offs); ++ dbg_dump_node(c, snod->node); ++ return -EINVAL; ++ } ++ ++ orph = snod->node; ++ ++ /* Check commit number */ ++ cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; ++ /* ++ * The commit number on the master node may be less, because ++ * of a failed commit. If there are several failed commits in a ++ * row, the commit number written on orphan nodes will continue ++ * to increase (because the commit number is adjusted here) even ++ * though the commit number on the master node stays the same ++ * because the master node has not been re-written. ++ */ ++ if (cmt_no > c->cmt_no) ++ c->cmt_no = cmt_no; ++ if (cmt_no < *last_cmt_no && *last_flagged) { ++ /* ++ * The last orphan node had a higher commit number and ++ * was flagged as the last written for that commit ++ * number. That makes this orphan node, out of date. ++ */ ++ if (!first) { ++ ubifs_err("out of order commit number %llu in " ++ "orphan node at %d:%d", ++ cmt_no, sleb->lnum, snod->offs); ++ dbg_dump_node(c, snod->node); ++ return -EINVAL; ++ } ++ dbg_rcvry("out of date LEB %d", sleb->lnum); ++ *outofdate = 1; ++ return 0; ++ } ++ ++ if (first) ++ first = 0; ++ ++ n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; ++ for (i = 0; i < n; i++) { ++ inum = le64_to_cpu(orph->inos[i]); ++ dbg_rcvry("deleting orphaned inode %lu", ++ (unsigned long)inum); ++ err = ubifs_tnc_remove_ino(c, inum); ++ if (err) ++ return err; ++ err = insert_dead_orphan(c, inum); ++ if (err) ++ return err; ++ } ++ ++ *last_cmt_no = cmt_no; ++ if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { ++ dbg_rcvry("last orph node for commit %llu at %d:%d", ++ cmt_no, sleb->lnum, snod->offs); ++ *last_flagged = 1; ++ } else ++ *last_flagged = 0; ++ } ++ ++ return 0; ++} ++ ++/** ++ * kill_orphans - remove all orphan inodes from the index. ++ * @c: UBIFS file-system description object ++ * ++ * If recovery is required, then orphan inodes recorded during the previous ++ * session (which ended with an unclean unmount) must be deleted from the index. ++ * This is done by updating the TNC, but since the index is not updated until ++ * the next commit, the LEBs where the orphan information is recorded are not ++ * erased until the next commit. ++ */ ++static int kill_orphans(struct ubifs_info *c) ++{ ++ unsigned long long last_cmt_no = 0; ++ int lnum, err = 0, outofdate = 0, last_flagged = 0; ++ ++ c->ohead_lnum = c->orph_first; ++ c->ohead_offs = 0; ++ /* Check no-orphans flag and skip this if no orphans */ ++ if (c->no_orphs) { ++ dbg_rcvry("no orphans"); ++ return 0; ++ } ++ /* ++ * Orph nodes always start at c->orph_first and are written to each ++ * successive LEB in turn. Generally unused LEBs will have been unmapped ++ * but may contain out of date orphan nodes if the unmap didn't go ++ * through. In addition, the last orphan node written for each commit is ++ * marked (top bit of orph->cmt_no is set to 1). It is possible that ++ * there are orphan nodes from the next commit (i.e. the commit did not ++ * complete successfully). In that case, no orphans will have been lost ++ * due to the way that orphans are written, and any orphans added will ++ * be valid orphans anyway and so can be deleted. ++ */ ++ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { ++ struct ubifs_scan_leb *sleb; ++ ++ dbg_rcvry("LEB %d", lnum); ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ if (IS_ERR(sleb)) { ++ sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); ++ if (IS_ERR(sleb)) { ++ err = PTR_ERR(sleb); ++ break; ++ } ++ } ++ err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate, ++ &last_flagged); ++ if (err || outofdate) { ++ ubifs_scan_destroy(sleb); ++ break; ++ } ++ if (sleb->endpt) { ++ c->ohead_lnum = lnum; ++ c->ohead_offs = sleb->endpt; ++ } ++ ubifs_scan_destroy(sleb); ++ } ++ return err; ++} ++ ++/** ++ * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. ++ * @c: UBIFS file-system description object ++ * @unclean: indicates recovery from unclean unmount ++ * @read_only: indicates read only mount ++ * ++ * This function is called when mounting to erase orphans from the previous ++ * session. If UBIFS was not unmounted cleanly, then the inodes recorded as ++ * orphans are deleted. ++ */ ++int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) ++{ ++ int err = 0; ++ ++ c->max_orphans = tot_avail_orphs(c); ++ ++ if (!read_only) { ++ c->orph_buf = vmalloc(c->leb_size); ++ if (!c->orph_buf) ++ return -ENOMEM; ++ } ++ ++ if (unclean) ++ err = kill_orphans(c); ++ else if (!read_only) ++ err = ubifs_clear_orphans(c); ++ ++ return err; ++} ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++struct check_orphan { ++ struct rb_node rb; ++ ino_t inum; ++}; ++ ++struct check_info { ++ unsigned long last_ino; ++ unsigned long tot_inos; ++ unsigned long missing; ++ unsigned long long leaf_cnt; ++ struct ubifs_ino_node *node; ++ struct rb_root root; ++}; ++ ++static int dbg_find_orphan(struct ubifs_info *c, ino_t inum) ++{ ++ struct ubifs_orphan *o; ++ struct rb_node *p; ++ ++ spin_lock(&c->orphan_lock); ++ p = c->orph_tree.rb_node; ++ while (p) { ++ o = rb_entry(p, struct ubifs_orphan, rb); ++ if (inum < o->inum) ++ p = p->rb_left; ++ else if (inum > o->inum) ++ p = p->rb_right; ++ else { ++ spin_unlock(&c->orphan_lock); ++ return 1; ++ } ++ } ++ spin_unlock(&c->orphan_lock); ++ return 0; ++} ++ ++static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum) ++{ ++ struct check_orphan *orphan, *o; ++ struct rb_node **p, *parent = NULL; ++ ++ orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS); ++ if (!orphan) ++ return -ENOMEM; ++ orphan->inum = inum; ++ ++ p = &root->rb_node; ++ while (*p) { ++ parent = *p; ++ o = rb_entry(parent, struct check_orphan, rb); ++ if (inum < o->inum) ++ p = &(*p)->rb_left; ++ else if (inum > o->inum) ++ p = &(*p)->rb_right; ++ else { ++ kfree(orphan); ++ return 0; ++ } ++ } ++ rb_link_node(&orphan->rb, parent, p); ++ rb_insert_color(&orphan->rb, root); ++ return 0; ++} ++ ++static int dbg_find_check_orphan(struct rb_root *root, ino_t inum) ++{ ++ struct check_orphan *o; ++ struct rb_node *p; ++ ++ p = root->rb_node; ++ while (p) { ++ o = rb_entry(p, struct check_orphan, rb); ++ if (inum < o->inum) ++ p = p->rb_left; ++ else if (inum > o->inum) ++ p = p->rb_right; ++ else ++ return 1; ++ } ++ return 0; ++} ++ ++static void dbg_free_check_tree(struct rb_root *root) ++{ ++ struct rb_node *this = root->rb_node; ++ struct check_orphan *o; ++ ++ while (this) { ++ if (this->rb_left) { ++ this = this->rb_left; ++ continue; ++ } else if (this->rb_right) { ++ this = this->rb_right; ++ continue; ++ } ++ o = rb_entry(this, struct check_orphan, rb); ++ this = rb_parent(this); ++ if (this) { ++ if (this->rb_left == &o->rb) ++ this->rb_left = NULL; ++ else ++ this->rb_right = NULL; ++ } ++ kfree(o); ++ } ++} ++ ++static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ void *priv) ++{ ++ struct check_info *ci = priv; ++ ino_t inum; ++ int err; ++ ++ inum = key_inum(c, &zbr->key); ++ if (inum != ci->last_ino) { ++ /* Lowest node type is the inode node, so it comes first */ ++ if (key_type(c, &zbr->key) != UBIFS_INO_KEY) ++ ubifs_err("found orphan node ino %lu, type %d", ++ (unsigned long)inum, key_type(c, &zbr->key)); ++ ci->last_ino = inum; ++ ci->tot_inos += 1; ++ err = ubifs_tnc_read_node(c, zbr, ci->node); ++ if (err) { ++ ubifs_err("node read failed, error %d", err); ++ return err; ++ } ++ if (ci->node->nlink == 0) ++ /* Must be recorded as an orphan */ ++ if (!dbg_find_check_orphan(&ci->root, inum) && ++ !dbg_find_orphan(c, inum)) { ++ ubifs_err("missing orphan, ino %lu", ++ (unsigned long)inum); ++ ci->missing += 1; ++ } ++ } ++ ci->leaf_cnt += 1; ++ return 0; ++} ++ ++static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) ++{ ++ struct ubifs_scan_node *snod; ++ struct ubifs_orph_node *orph; ++ ino_t inum; ++ int i, n, err; ++ ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ cond_resched(); ++ if (snod->type != UBIFS_ORPH_NODE) ++ continue; ++ orph = snod->node; ++ n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; ++ for (i = 0; i < n; i++) { ++ inum = le64_to_cpu(orph->inos[i]); ++ err = dbg_ins_check_orphan(&ci->root, inum); ++ if (err) ++ return err; ++ } ++ } ++ return 0; ++} ++ ++static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) ++{ ++ int lnum, err = 0; ++ ++ /* Check no-orphans flag and skip this if no orphans */ ++ if (c->no_orphs) ++ return 0; ++ ++ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { ++ struct ubifs_scan_leb *sleb; ++ ++ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); ++ if (IS_ERR(sleb)) { ++ err = PTR_ERR(sleb); ++ break; ++ } ++ ++ err = dbg_read_orphans(ci, sleb); ++ ubifs_scan_destroy(sleb); ++ if (err) ++ break; ++ } ++ ++ return err; ++} ++ ++static int dbg_check_orphans(struct ubifs_info *c) ++{ ++ struct check_info ci; ++ int err; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) ++ return 0; ++ ++ ci.last_ino = 0; ++ ci.tot_inos = 0; ++ ci.missing = 0; ++ ci.leaf_cnt = 0; ++ ci.root = RB_ROOT; ++ ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); ++ if (!ci.node) { ++ ubifs_err("out of memory"); ++ return -ENOMEM; ++ } ++ ++ err = dbg_scan_orphans(c, &ci); ++ if (err) ++ goto out; ++ ++ err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); ++ if (err) { ++ ubifs_err("cannot scan TNC, error %d", err); ++ goto out; ++ } ++ ++ if (ci.missing) { ++ ubifs_err("%lu missing orphan(s)", ci.missing); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ dbg_cmt("last inode number is %lu", ci.last_ino); ++ dbg_cmt("total number of inodes is %lu", ci.tot_inos); ++ dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt); ++ ++out: ++ dbg_free_check_tree(&ci.root); ++ kfree(ci.node); ++ return err; ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -Nurd linux-2.6.24/fs/ubifs/recovery.c ubifs-v2.6.24/fs/ubifs/recovery.c +--- linux-2.6.24/fs/ubifs/recovery.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/recovery.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1496 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements functions needed to recover from unclean un-mounts. ++ * When UBIFS is mounted, it checks a flag on the master node to determine if ++ * an un-mount was completed sucessfully. If not, the process of mounting ++ * incorparates additional checking and fixing of on-flash data structures. ++ * UBIFS always cleans away all remnants of an unclean un-mount, so that ++ * errors do not accumulate. However UBIFS defers recovery if it is mounted ++ * read-only, and the flash is not modified in that case. ++ */ ++ ++#include <linux/crc32.h> ++#include "ubifs.h" ++ ++/** ++ * is_empty - determine whether a buffer is empty (contains all 0xff). ++ * @buf: buffer to clean ++ * @len: length of buffer ++ * ++ * This function returns %1 if the buffer is empty (contains all 0xff) otherwise ++ * %0 is returned. ++ */ ++static int is_empty(void *buf, int len) ++{ ++ uint8_t *p = buf; ++ int i; ++ ++ for (i = 0; i < len; i++) ++ if (*p++ != 0xff) ++ return 0; ++ return 1; ++} ++ ++/** ++ * get_master_node - get the last valid master node allowing for corruption. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number ++ * @pbuf: buffer containing the LEB read, is returned here ++ * @mst: master node, if found, is returned here ++ * @cor: corruption, if found, is returned here ++ * ++ * This function allocates a buffer, reads the LEB into it, and finds and ++ * returns the last valid master node allowing for one area of corruption. ++ * The corrupt area, if there is one, must be consistent with the assumption ++ * that it is the result of an unclean unmount while the master node was being ++ * written. Under those circumstances, it is valid to use the previously written ++ * master node. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, ++ struct ubifs_mst_node **mst, void **cor) ++{ ++ const int sz = c->mst_node_alsz; ++ int err, offs, len; ++ void *sbuf, *buf; ++ ++ sbuf = vmalloc(c->leb_size); ++ if (!sbuf) ++ return -ENOMEM; ++ ++ err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); ++ if (err && err != -EBADMSG) ++ goto out_free; ++ ++ /* Find the first position that is definitely not a node */ ++ offs = 0; ++ buf = sbuf; ++ len = c->leb_size; ++ while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { ++ struct ubifs_ch *ch = buf; ++ ++ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) ++ break; ++ offs += sz; ++ buf += sz; ++ len -= sz; ++ } ++ /* See if there was a valid master node before that */ ++ if (offs) { ++ int ret; ++ ++ offs -= sz; ++ buf -= sz; ++ len += sz; ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); ++ if (ret != SCANNED_A_NODE && offs) { ++ /* Could have been corruption so check one place back */ ++ offs -= sz; ++ buf -= sz; ++ len += sz; ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); ++ if (ret != SCANNED_A_NODE) ++ /* ++ * We accept only one area of corruption because ++ * we are assuming that it was caused while ++ * trying to write a master node. ++ */ ++ goto out_err; ++ } ++ if (ret == SCANNED_A_NODE) { ++ struct ubifs_ch *ch = buf; ++ ++ if (ch->node_type != UBIFS_MST_NODE) ++ goto out_err; ++ dbg_rcvry("found a master node at %d:%d", lnum, offs); ++ *mst = buf; ++ offs += sz; ++ buf += sz; ++ len -= sz; ++ } ++ } ++ /* Check for corruption */ ++ if (offs < c->leb_size) { ++ if (!is_empty(buf, min_t(int, len, sz))) { ++ *cor = buf; ++ dbg_rcvry("found corruption at %d:%d", lnum, offs); ++ } ++ offs += sz; ++ buf += sz; ++ len -= sz; ++ } ++ /* Check remaining empty space */ ++ if (offs < c->leb_size) ++ if (!is_empty(buf, len)) ++ goto out_err; ++ *pbuf = sbuf; ++ return 0; ++ ++out_err: ++ err = -EINVAL; ++out_free: ++ vfree(sbuf); ++ *mst = NULL; ++ *cor = NULL; ++ return err; ++} ++ ++/** ++ * write_rcvrd_mst_node - write recovered master node. ++ * @c: UBIFS file-system description object ++ * @mst: master node ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int write_rcvrd_mst_node(struct ubifs_info *c, ++ struct ubifs_mst_node *mst) ++{ ++ int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; ++ __le32 save_flags; ++ ++ dbg_rcvry("recovery"); ++ ++ save_flags = mst->flags; ++ mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ++ ++ ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); ++ err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); ++ if (err) ++ goto out; ++ err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); ++ if (err) ++ goto out; ++out: ++ mst->flags = save_flags; ++ return err; ++} ++ ++/** ++ * ubifs_recover_master_node - recover the master node. ++ * @c: UBIFS file-system description object ++ * ++ * This function recovers the master node from corruption that may occur due to ++ * an unclean unmount. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_recover_master_node(struct ubifs_info *c) ++{ ++ void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; ++ struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; ++ const int sz = c->mst_node_alsz; ++ int err, offs1, offs2; ++ ++ dbg_rcvry("recovery"); ++ ++ err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); ++ if (err) ++ goto out_free; ++ ++ err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); ++ if (err) ++ goto out_free; ++ ++ if (mst1) { ++ offs1 = (void *)mst1 - buf1; ++ if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && ++ (offs1 == 0 && !cor1)) { ++ /* ++ * mst1 was written by recovery at offset 0 with no ++ * corruption. ++ */ ++ dbg_rcvry("recovery recovery"); ++ mst = mst1; ++ } else if (mst2) { ++ offs2 = (void *)mst2 - buf2; ++ if (offs1 == offs2) { ++ /* Same offset, so must be the same */ ++ if (memcmp((void *)mst1 + UBIFS_CH_SZ, ++ (void *)mst2 + UBIFS_CH_SZ, ++ UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) ++ goto out_err; ++ mst = mst1; ++ } else if (offs2 + sz == offs1) { ++ /* 1st LEB was written, 2nd was not */ ++ if (cor1) ++ goto out_err; ++ mst = mst1; ++ } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { ++ /* 1st LEB was unmapped and written, 2nd not */ ++ if (cor1) ++ goto out_err; ++ mst = mst1; ++ } else ++ goto out_err; ++ } else { ++ /* ++ * 2nd LEB was unmapped and about to be written, so ++ * there must be only one master node in the first LEB ++ * and no corruption. ++ */ ++ if (offs1 != 0 || cor1) ++ goto out_err; ++ mst = mst1; ++ } ++ } else { ++ if (!mst2) ++ goto out_err; ++ /* ++ * 1st LEB was unmapped and about to be written, so there must ++ * be no room left in 2nd LEB. ++ */ ++ offs2 = (void *)mst2 - buf2; ++ if (offs2 + sz + sz <= c->leb_size) ++ goto out_err; ++ mst = mst2; ++ } ++ ++ dbg_rcvry("recovered master node from LEB %d", ++ (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); ++ ++ memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); ++ ++ if ((c->vfs_sb->s_flags & MS_RDONLY)) { ++ /* Read-only mode. Keep a copy for switching to rw mode */ ++ c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); ++ if (!c->rcvrd_mst_node) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); ++ } else { ++ /* Write the recovered master node */ ++ c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; ++ err = write_rcvrd_mst_node(c, c->mst_node); ++ if (err) ++ goto out_free; ++ } ++ ++ vfree(buf2); ++ vfree(buf1); ++ ++ return 0; ++ ++out_err: ++ err = -EINVAL; ++out_free: ++ ubifs_err("failed to recover master node"); ++ if (mst1) { ++ dbg_err("dumping first master node"); ++ dbg_dump_node(c, mst1); ++ } ++ if (mst2) { ++ dbg_err("dumping second master node"); ++ dbg_dump_node(c, mst2); ++ } ++ vfree(buf2); ++ vfree(buf1); ++ return err; ++} ++ ++/** ++ * ubifs_write_rcvrd_mst_node - write the recovered master node. ++ * @c: UBIFS file-system description object ++ * ++ * This function writes the master node that was recovered during mounting in ++ * read-only mode and must now be written because we are remounting rw. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) ++{ ++ int err; ++ ++ if (!c->rcvrd_mst_node) ++ return 0; ++ c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); ++ err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); ++ if (err) ++ return err; ++ kfree(c->rcvrd_mst_node); ++ c->rcvrd_mst_node = NULL; ++ return 0; ++} ++ ++/** ++ * is_last_write - determine if an offset was in the last write to a LEB. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to check ++ * @offs: offset to check ++ * ++ * This function returns %1 if @offs was in the last write to the LEB whose data ++ * is in @buf, otherwise %0 is returned. The determination is made by checking ++ * for subsequent empty space starting from the next min_io_size boundary (or a ++ * bit less than the common header size if min_io_size is one). ++ */ ++static int is_last_write(const struct ubifs_info *c, void *buf, int offs) ++{ ++ int empty_offs; ++ int check_len; ++ uint8_t *p; ++ ++ if (c->min_io_size == 1) { ++ check_len = c->leb_size - offs; ++ p = buf + check_len; ++ for (; check_len > 0; check_len--) ++ if (*--p != 0xff) ++ break; ++ /* ++ * 'check_len' is the size of the corruption which cannot be ++ * more than the size of 1 node if it was caused by an unclean ++ * unmount. ++ */ ++ if (check_len > UBIFS_MAX_NODE_SZ) ++ return 0; ++ return 1; ++ } ++ ++ /* ++ * Round up to the next c->min_io_size boundary i.e. 'offs' is in the ++ * last wbuf written. After that should be empty space. ++ */ ++ empty_offs = ALIGN(offs + 1, c->min_io_size); ++ check_len = c->leb_size - empty_offs; ++ p = buf + empty_offs - offs; ++ ++ for (; check_len > 0; check_len--) ++ if (*p++ != 0xff) ++ return 0; ++ return 1; ++} ++ ++/** ++ * clean_buf - clean the data from an LEB sitting in a buffer. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to clean ++ * @lnum: LEB number to clean ++ * @offs: offset from which to clean ++ * @len: length of buffer ++ * ++ * This function pads up to the next min_io_size boundary (if there is one) and ++ * sets empty space to all 0xff. @buf, @offs and @len are updated to the next ++ * min_io_size boundary (if there is one). ++ */ ++static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, ++ int *offs, int *len) ++{ ++ int empty_offs, pad_len; ++ ++ lnum = lnum; ++ dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); ++ ++ if (c->min_io_size == 1) { ++ memset(*buf, 0xff, c->leb_size - *offs); ++ return; ++ } ++ ++ ubifs_assert(!(*offs & 7)); ++ empty_offs = ALIGN(*offs, c->min_io_size); ++ pad_len = empty_offs - *offs; ++ ubifs_pad(c, *buf, pad_len); ++ *offs += pad_len; ++ *buf += pad_len; ++ *len -= pad_len; ++ memset(*buf, 0xff, c->leb_size - empty_offs); ++} ++ ++/** ++ * no_more_nodes - determine if there are no more nodes in a buffer. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to check ++ * @len: length of buffer ++ * @lnum: LEB number of the LEB from which @buf was read ++ * @offs: offset from which @buf was read ++ * ++ * This function ensures that the corrupted node at @offs is the last thing ++ * written to a LEB. This function returns %1 if more data is not found and ++ * %0 if more data is found. ++ */ ++static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, ++ int lnum, int offs) ++{ ++ struct ubifs_ch *ch = buf; ++ int skip, dlen = le32_to_cpu(ch->len); ++ ++ /* Check for empty space after the corrupt node's common header */ ++ skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; ++ if (is_empty(buf + skip, len - skip)) ++ return 1; ++ /* ++ * The area after the common header size is not empty, so the common ++ * header must be intact. Check it. ++ */ ++ if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { ++ dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); ++ return 0; ++ } ++ /* Now we know the corrupt node's length we can skip over it */ ++ skip = ALIGN(offs + dlen, c->min_io_size) - offs; ++ /* After which there should be empty space */ ++ if (is_empty(buf + skip, len - skip)) ++ return 1; ++ dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip); ++ return 0; ++} ++ ++/** ++ * fix_unclean_leb - fix an unclean LEB. ++ * @c: UBIFS file-system description object ++ * @sleb: scanned LEB information ++ * @start: offset where scan started ++ */ ++static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ int start) ++{ ++ int lnum = sleb->lnum, endpt = start; ++ ++ /* Get the end offset of the last node we are keeping */ ++ if (!list_empty(&sleb->nodes)) { ++ struct ubifs_scan_node *snod; ++ ++ snod = list_entry(sleb->nodes.prev, ++ struct ubifs_scan_node, list); ++ endpt = snod->offs + snod->len; ++ } ++ ++ if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { ++ /* Add to recovery list */ ++ struct ubifs_unclean_leb *ucleb; ++ ++ dbg_rcvry("need to fix LEB %d start %d endpt %d", ++ lnum, start, sleb->endpt); ++ ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); ++ if (!ucleb) ++ return -ENOMEM; ++ ucleb->lnum = lnum; ++ ucleb->endpt = endpt; ++ list_add_tail(&ucleb->list, &c->unclean_leb_list); ++ } else { ++ /* Write the fixed LEB back to flash */ ++ int err; ++ ++ dbg_rcvry("fixing LEB %d start %d endpt %d", ++ lnum, start, sleb->endpt); ++ if (endpt == 0) { ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ } else { ++ int len = ALIGN(endpt, c->min_io_size); ++ ++ if (start) { ++ err = ubi_read(c->ubi, lnum, sleb->buf, 0, ++ start); ++ if (err) ++ return err; ++ } ++ /* Pad to min_io_size */ ++ if (len > endpt) { ++ int pad_len = len - ALIGN(endpt, 8); ++ ++ if (pad_len > 0) { ++ void *buf = sleb->buf + len - pad_len; ++ ++ ubifs_pad(c, buf, pad_len); ++ } ++ } ++ err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, ++ UBI_UNKNOWN); ++ if (err) ++ return err; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * drop_incomplete_group - drop nodes from an incomplete group. ++ * @sleb: scanned LEB information ++ * @offs: offset of dropped nodes is returned here ++ * ++ * This function returns %1 if nodes are dropped and %0 otherwise. ++ */ ++static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) ++{ ++ int dropped = 0; ++ ++ while (!list_empty(&sleb->nodes)) { ++ struct ubifs_scan_node *snod; ++ struct ubifs_ch *ch; ++ ++ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, ++ list); ++ ch = snod->node; ++ if (ch->group_type != UBIFS_IN_NODE_GROUP) ++ return dropped; ++ dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); ++ *offs = snod->offs; ++ list_del(&snod->list); ++ kfree(snod); ++ sleb->nodes_cnt -= 1; ++ dropped = 1; ++ } ++ return dropped; ++} ++ ++/** ++ * ubifs_recover_leb - scan and recover a LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number ++ * @offs: offset ++ * @sbuf: LEB-sized buffer to use ++ * @grouped: nodes may be grouped for recovery ++ * ++ * This function does a scan of a LEB, but caters for errors that might have ++ * been caused by the unclean unmount from which we are attempting to recover. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, ++ int offs, void *sbuf, int grouped) ++{ ++ int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; ++ int empty_chkd = 0, start = offs; ++ struct ubifs_scan_leb *sleb; ++ void *buf = sbuf + offs; ++ ++ dbg_rcvry("%d:%d", lnum, offs); ++ ++ sleb = ubifs_start_scan(c, lnum, offs, sbuf); ++ if (IS_ERR(sleb)) ++ return sleb; ++ ++ if (sleb->ecc) ++ need_clean = 1; ++ ++ while (len >= 8) { ++ int ret; ++ ++ dbg_scan("look at LEB %d:%d (%d bytes left)", ++ lnum, offs, len); ++ ++ cond_resched(); ++ ++ /* ++ * Scan quietly until there is an error from which we cannot ++ * recover ++ */ ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); ++ ++ if (ret == SCANNED_A_NODE) { ++ /* A valid node, and not a padding node */ ++ struct ubifs_ch *ch = buf; ++ int node_len; ++ ++ err = ubifs_add_snod(c, sleb, buf, offs); ++ if (err) ++ goto error; ++ node_len = ALIGN(le32_to_cpu(ch->len), 8); ++ offs += node_len; ++ buf += node_len; ++ len -= node_len; ++ continue; ++ } ++ ++ if (ret > 0) { ++ /* Padding bytes or a valid padding node */ ++ offs += ret; ++ buf += ret; ++ len -= ret; ++ continue; ++ } ++ ++ if (ret == SCANNED_EMPTY_SPACE) { ++ if (!is_empty(buf, len)) { ++ if (!is_last_write(c, buf, offs)) ++ break; ++ clean_buf(c, &buf, lnum, &offs, &len); ++ need_clean = 1; ++ } ++ empty_chkd = 1; ++ break; ++ } ++ ++ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) ++ if (is_last_write(c, buf, offs)) { ++ clean_buf(c, &buf, lnum, &offs, &len); ++ need_clean = 1; ++ empty_chkd = 1; ++ break; ++ } ++ ++ if (ret == SCANNED_A_CORRUPT_NODE) ++ if (no_more_nodes(c, buf, len, lnum, offs)) { ++ clean_buf(c, &buf, lnum, &offs, &len); ++ need_clean = 1; ++ empty_chkd = 1; ++ break; ++ } ++ ++ if (quiet) { ++ /* Redo the last scan but noisily */ ++ quiet = 0; ++ continue; ++ } ++ ++ switch (ret) { ++ case SCANNED_GARBAGE: ++ dbg_err("garbage"); ++ goto corrupted; ++ case SCANNED_A_CORRUPT_NODE: ++ case SCANNED_A_BAD_PAD_NODE: ++ dbg_err("bad node"); ++ goto corrupted; ++ default: ++ dbg_err("unknown"); ++ goto corrupted; ++ } ++ } ++ ++ if (!empty_chkd && !is_empty(buf, len)) { ++ if (is_last_write(c, buf, offs)) { ++ clean_buf(c, &buf, lnum, &offs, &len); ++ need_clean = 1; ++ } else { ++ ubifs_err("corrupt empty space at LEB %d:%d", ++ lnum, offs); ++ goto corrupted; ++ } ++ } ++ ++ /* Drop nodes from incomplete group */ ++ if (grouped && drop_incomplete_group(sleb, &offs)) { ++ buf = sbuf + offs; ++ len = c->leb_size - offs; ++ clean_buf(c, &buf, lnum, &offs, &len); ++ need_clean = 1; ++ } ++ ++ if (offs % c->min_io_size) { ++ clean_buf(c, &buf, lnum, &offs, &len); ++ need_clean = 1; ++ } ++ ++ ubifs_end_scan(c, sleb, lnum, offs); ++ ++ if (need_clean) { ++ err = fix_unclean_leb(c, sleb, start); ++ if (err) ++ goto error; ++ } ++ ++ return sleb; ++ ++corrupted: ++ ubifs_scanned_corruption(c, lnum, offs, buf); ++ err = -EUCLEAN; ++error: ++ ubifs_err("LEB %d scanning failed", lnum); ++ ubifs_scan_destroy(sleb); ++ return ERR_PTR(err); ++} ++ ++/** ++ * get_cs_sqnum - get commit start sequence number. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number of commit start node ++ * @offs: offset of commit start node ++ * @cs_sqnum: commit start sequence number is returned here ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, ++ unsigned long long *cs_sqnum) ++{ ++ struct ubifs_cs_node *cs_node = NULL; ++ int err, ret; ++ ++ dbg_rcvry("at %d:%d", lnum, offs); ++ cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); ++ if (!cs_node) ++ return -ENOMEM; ++ if (c->leb_size - offs < UBIFS_CS_NODE_SZ) ++ goto out_err; ++ err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); ++ if (err && err != -EBADMSG) ++ goto out_free; ++ ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); ++ if (ret != SCANNED_A_NODE) { ++ dbg_err("Not a valid node"); ++ goto out_err; ++ } ++ if (cs_node->ch.node_type != UBIFS_CS_NODE) { ++ dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); ++ goto out_err; ++ } ++ if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { ++ dbg_err("CS node cmt_no %llu != current cmt_no %llu", ++ (unsigned long long)le64_to_cpu(cs_node->cmt_no), ++ c->cmt_no); ++ goto out_err; ++ } ++ *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); ++ dbg_rcvry("commit start sqnum %llu", *cs_sqnum); ++ kfree(cs_node); ++ return 0; ++ ++out_err: ++ err = -EINVAL; ++out_free: ++ ubifs_err("failed to get CS sqnum"); ++ kfree(cs_node); ++ return err; ++} ++ ++/** ++ * ubifs_recover_log_leb - scan and recover a log LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number ++ * @offs: offset ++ * @sbuf: LEB-sized buffer to use ++ * ++ * This function does a scan of a LEB, but caters for errors that might have ++ * been caused by the unclean unmount from which we are attempting to recover. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, ++ int offs, void *sbuf) ++{ ++ struct ubifs_scan_leb *sleb; ++ int next_lnum; ++ ++ dbg_rcvry("LEB %d", lnum); ++ next_lnum = lnum + 1; ++ if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) ++ next_lnum = UBIFS_LOG_LNUM; ++ if (next_lnum != c->ltail_lnum) { ++ /* ++ * We can only recover at the end of the log, so check that the ++ * next log LEB is empty or out of date. ++ */ ++ sleb = ubifs_scan(c, next_lnum, 0, sbuf); ++ if (IS_ERR(sleb)) ++ return sleb; ++ if (sleb->nodes_cnt) { ++ struct ubifs_scan_node *snod; ++ unsigned long long cs_sqnum = c->cs_sqnum; ++ ++ snod = list_entry(sleb->nodes.next, ++ struct ubifs_scan_node, list); ++ if (cs_sqnum == 0) { ++ int err; ++ ++ err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); ++ if (err) { ++ ubifs_scan_destroy(sleb); ++ return ERR_PTR(err); ++ } ++ } ++ if (snod->sqnum > cs_sqnum) { ++ ubifs_err("unrecoverable log corruption " ++ "in LEB %d", lnum); ++ ubifs_scan_destroy(sleb); ++ return ERR_PTR(-EUCLEAN); ++ } ++ } ++ ubifs_scan_destroy(sleb); ++ } ++ return ubifs_recover_leb(c, lnum, offs, sbuf, 0); ++} ++ ++/** ++ * recover_head - recover a head. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number of head to recover ++ * @offs: offset of head to recover ++ * @sbuf: LEB-sized buffer to use ++ * ++ * This function ensures that there is no data on the flash at a head location. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int recover_head(const struct ubifs_info *c, int lnum, int offs, ++ void *sbuf) ++{ ++ int len, err, need_clean = 0; ++ ++ if (c->min_io_size > 1) ++ len = c->min_io_size; ++ else ++ len = 512; ++ if (offs + len > c->leb_size) ++ len = c->leb_size - offs; ++ ++ if (!len) ++ return 0; ++ ++ /* Read at the head location and check it is empty flash */ ++ err = ubi_read(c->ubi, lnum, sbuf, offs, len); ++ if (err) ++ need_clean = 1; ++ else { ++ uint8_t *p = sbuf; ++ ++ while (len--) ++ if (*p++ != 0xff) { ++ need_clean = 1; ++ break; ++ } ++ } ++ ++ if (need_clean) { ++ dbg_rcvry("cleaning head at %d:%d", lnum, offs); ++ if (offs == 0) ++ return ubifs_leb_unmap(c, lnum); ++ err = ubi_read(c->ubi, lnum, sbuf, 0, offs); ++ if (err) ++ return err; ++ return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); ++ } ++ ++ return 0; ++} ++ ++/** ++ * ubifs_recover_inl_heads - recover index and LPT heads. ++ * @c: UBIFS file-system description object ++ * @sbuf: LEB-sized buffer to use ++ * ++ * This function ensures that there is no data on the flash at the index and ++ * LPT head locations. ++ * ++ * This deals with the recovery of a half-completed journal commit. UBIFS is ++ * careful never to overwrite the last version of the index or the LPT. Because ++ * the index and LPT are wandering trees, data from a half-completed commit will ++ * not be referenced anywhere in UBIFS. The data will be either in LEBs that are ++ * assumed to be empty and will be unmapped anyway before use, or in the index ++ * and LPT heads. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) ++{ ++ int err; ++ ++ ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); ++ ++ dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); ++ err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); ++ if (err) ++ return err; ++ ++ dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); ++ err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++/** ++ * clean_an_unclean_leb - read and write a LEB to remove corruption. ++ * @c: UBIFS file-system description object ++ * @ucleb: unclean LEB information ++ * @sbuf: LEB-sized buffer to use ++ * ++ * This function reads a LEB up to a point pre-determined by the mount recovery, ++ * checks the nodes, and writes the result back to the flash, thereby cleaning ++ * off any following corruption, or non-fatal ECC errors. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int clean_an_unclean_leb(const struct ubifs_info *c, ++ struct ubifs_unclean_leb *ucleb, void *sbuf) ++{ ++ int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; ++ void *buf = sbuf; ++ ++ dbg_rcvry("LEB %d len %d", lnum, len); ++ ++ if (len == 0) { ++ /* Nothing to read, just unmap it */ ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ return 0; ++ } ++ ++ err = ubi_read(c->ubi, lnum, buf, offs, len); ++ if (err && err != -EBADMSG) ++ return err; ++ ++ while (len >= 8) { ++ int ret; ++ ++ cond_resched(); ++ ++ /* Scan quietly until there is an error */ ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); ++ ++ if (ret == SCANNED_A_NODE) { ++ /* A valid node, and not a padding node */ ++ struct ubifs_ch *ch = buf; ++ int node_len; ++ ++ node_len = ALIGN(le32_to_cpu(ch->len), 8); ++ offs += node_len; ++ buf += node_len; ++ len -= node_len; ++ continue; ++ } ++ ++ if (ret > 0) { ++ /* Padding bytes or a valid padding node */ ++ offs += ret; ++ buf += ret; ++ len -= ret; ++ continue; ++ } ++ ++ if (ret == SCANNED_EMPTY_SPACE) { ++ ubifs_err("unexpected empty space at %d:%d", ++ lnum, offs); ++ return -EUCLEAN; ++ } ++ ++ if (quiet) { ++ /* Redo the last scan but noisily */ ++ quiet = 0; ++ continue; ++ } ++ ++ ubifs_scanned_corruption(c, lnum, offs, buf); ++ return -EUCLEAN; ++ } ++ ++ /* Pad to min_io_size */ ++ len = ALIGN(ucleb->endpt, c->min_io_size); ++ if (len > ucleb->endpt) { ++ int pad_len = len - ALIGN(ucleb->endpt, 8); ++ ++ if (pad_len > 0) { ++ buf = c->sbuf + len - pad_len; ++ ubifs_pad(c, buf, pad_len); ++ } ++ } ++ ++ /* Write back the LEB atomically */ ++ err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); ++ if (err) ++ return err; ++ ++ dbg_rcvry("cleaned LEB %d", lnum); ++ ++ return 0; ++} ++ ++/** ++ * ubifs_clean_lebs - clean LEBs recovered during read-only mount. ++ * @c: UBIFS file-system description object ++ * @sbuf: LEB-sized buffer to use ++ * ++ * This function cleans a LEB identified during recovery that needs to be ++ * written but was not because UBIFS was mounted read-only. This happens when ++ * remounting to read-write mode. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) ++{ ++ dbg_rcvry("recovery"); ++ while (!list_empty(&c->unclean_leb_list)) { ++ struct ubifs_unclean_leb *ucleb; ++ int err; ++ ++ ucleb = list_entry(c->unclean_leb_list.next, ++ struct ubifs_unclean_leb, list); ++ err = clean_an_unclean_leb(c, ucleb, sbuf); ++ if (err) ++ return err; ++ list_del(&ucleb->list); ++ kfree(ucleb); ++ } ++ return 0; ++} ++ ++/** ++ * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. ++ * @c: UBIFS file-system description object ++ * ++ * Out-of-place garbage collection requires always one empty LEB with which to ++ * start garbage collection. The LEB number is recorded in c->gc_lnum and is ++ * written to the master node on unmounting. In the case of an unclean unmount ++ * the value of gc_lnum recorded in the master node is out of date and cannot ++ * be used. Instead, recovery must allocate an empty LEB for this purpose. ++ * However, there may not be enough empty space, in which case it must be ++ * possible to GC the dirtiest LEB into the GC head LEB. ++ * ++ * This function also runs the commit which causes the TNC updates from ++ * size-recovery and orphans to be written to the flash. That is important to ++ * ensure correct replay order for subsequent mounts. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_rcvry_gc_commit(struct ubifs_info *c) ++{ ++ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; ++ struct ubifs_lprops lp; ++ int lnum, err; ++ ++ c->gc_lnum = -1; ++ if (wbuf->lnum == -1) { ++ dbg_rcvry("no GC head LEB"); ++ goto find_free; ++ } ++ /* ++ * See whether the used space in the dirtiest LEB fits in the GC head ++ * LEB. ++ */ ++ if (wbuf->offs == c->leb_size) { ++ dbg_rcvry("no room in GC head LEB"); ++ goto find_free; ++ } ++ err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); ++ if (err) { ++ if (err == -ENOSPC) ++ dbg_err("could not find a dirty LEB"); ++ return err; ++ } ++ ubifs_assert(!(lp.flags & LPROPS_INDEX)); ++ lnum = lp.lnum; ++ if (lp.free + lp.dirty == c->leb_size) { ++ /* An empty LEB was returned */ ++ if (lp.free != c->leb_size) { ++ err = ubifs_change_one_lp(c, lnum, c->leb_size, ++ 0, 0, 0, 0); ++ if (err) ++ return err; ++ } ++ err = ubifs_leb_unmap(c, lnum); ++ if (err) ++ return err; ++ c->gc_lnum = lnum; ++ dbg_rcvry("allocated LEB %d for GC", lnum); ++ /* Run the commit */ ++ dbg_rcvry("committing"); ++ return ubifs_run_commit(c); ++ } ++ /* ++ * There was no empty LEB so the used space in the dirtiest LEB must fit ++ * in the GC head LEB. ++ */ ++ if (lp.free + lp.dirty < wbuf->offs) { ++ dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", ++ lnum, wbuf->lnum, wbuf->offs); ++ err = ubifs_return_leb(c, lnum); ++ if (err) ++ return err; ++ goto find_free; ++ } ++ /* ++ * We run the commit before garbage collection otherwise subsequent ++ * mounts will see the GC and orphan deletion in a different order. ++ */ ++ dbg_rcvry("committing"); ++ err = ubifs_run_commit(c); ++ if (err) ++ return err; ++ /* ++ * The data in the dirtiest LEB fits in the GC head LEB, so do the GC ++ * - use locking to keep 'ubifs_assert()' happy. ++ */ ++ dbg_rcvry("GC'ing LEB %d", lnum); ++ mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); ++ err = ubifs_garbage_collect_leb(c, &lp); ++ if (err >= 0) { ++ int err2 = ubifs_wbuf_sync_nolock(wbuf); ++ ++ if (err2) ++ err = err2; ++ } ++ mutex_unlock(&wbuf->io_mutex); ++ if (err < 0) { ++ dbg_err("GC failed, error %d", err); ++ if (err == -EAGAIN) ++ err = -EINVAL; ++ return err; ++ } ++ if (err != LEB_RETAINED) { ++ dbg_err("GC returned %d", err); ++ return -EINVAL; ++ } ++ err = ubifs_leb_unmap(c, c->gc_lnum); ++ if (err) ++ return err; ++ dbg_rcvry("allocated LEB %d for GC", lnum); ++ return 0; ++ ++find_free: ++ /* ++ * There is no GC head LEB or the free space in the GC head LEB is too ++ * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so ++ * GC is not run. ++ */ ++ lnum = ubifs_find_free_leb_for_idx(c); ++ if (lnum < 0) { ++ dbg_err("could not find an empty LEB"); ++ return lnum; ++ } ++ /* And reset the index flag */ ++ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_INDEX, 0); ++ if (err) ++ return err; ++ c->gc_lnum = lnum; ++ dbg_rcvry("allocated LEB %d for GC", lnum); ++ /* Run the commit */ ++ dbg_rcvry("committing"); ++ return ubifs_run_commit(c); ++} ++ ++/** ++ * struct size_entry - inode size information for recovery. ++ * @rb: link in the RB-tree of sizes ++ * @inum: inode number ++ * @i_size: size on inode ++ * @d_size: maximum size based on data nodes ++ * @exists: indicates whether the inode exists ++ * @inode: inode if pinned in memory awaiting rw mode to fix it ++ */ ++struct size_entry { ++ struct rb_node rb; ++ ino_t inum; ++ loff_t i_size; ++ loff_t d_size; ++ int exists; ++ struct inode *inode; ++}; ++ ++/** ++ * add_ino - add an entry to the size tree. ++ * @c: UBIFS file-system description object ++ * @inum: inode number ++ * @i_size: size on inode ++ * @d_size: maximum size based on data nodes ++ * @exists: indicates whether the inode exists ++ */ ++static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, ++ loff_t d_size, int exists) ++{ ++ struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; ++ struct size_entry *e; ++ ++ while (*p) { ++ parent = *p; ++ e = rb_entry(parent, struct size_entry, rb); ++ if (inum < e->inum) ++ p = &(*p)->rb_left; ++ else ++ p = &(*p)->rb_right; ++ } ++ ++ e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); ++ if (!e) ++ return -ENOMEM; ++ ++ e->inum = inum; ++ e->i_size = i_size; ++ e->d_size = d_size; ++ e->exists = exists; ++ ++ rb_link_node(&e->rb, parent, p); ++ rb_insert_color(&e->rb, &c->size_tree); ++ ++ return 0; ++} ++ ++/** ++ * find_ino - find an entry on the size tree. ++ * @c: UBIFS file-system description object ++ * @inum: inode number ++ */ ++static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) ++{ ++ struct rb_node *p = c->size_tree.rb_node; ++ struct size_entry *e; ++ ++ while (p) { ++ e = rb_entry(p, struct size_entry, rb); ++ if (inum < e->inum) ++ p = p->rb_left; ++ else if (inum > e->inum) ++ p = p->rb_right; ++ else ++ return e; ++ } ++ return NULL; ++} ++ ++/** ++ * remove_ino - remove an entry from the size tree. ++ * @c: UBIFS file-system description object ++ * @inum: inode number ++ */ ++static void remove_ino(struct ubifs_info *c, ino_t inum) ++{ ++ struct size_entry *e = find_ino(c, inum); ++ ++ if (!e) ++ return; ++ rb_erase(&e->rb, &c->size_tree); ++ kfree(e); ++} ++ ++/** ++ * ubifs_destroy_size_tree - free resources related to the size tree. ++ * @c: UBIFS file-system description object ++ */ ++void ubifs_destroy_size_tree(struct ubifs_info *c) ++{ ++ struct rb_node *this = c->size_tree.rb_node; ++ struct size_entry *e; ++ ++ while (this) { ++ if (this->rb_left) { ++ this = this->rb_left; ++ continue; ++ } else if (this->rb_right) { ++ this = this->rb_right; ++ continue; ++ } ++ e = rb_entry(this, struct size_entry, rb); ++ if (e->inode) ++ iput(e->inode); ++ this = rb_parent(this); ++ if (this) { ++ if (this->rb_left == &e->rb) ++ this->rb_left = NULL; ++ else ++ this->rb_right = NULL; ++ } ++ kfree(e); ++ } ++ c->size_tree = RB_ROOT; ++} ++ ++/** ++ * ubifs_recover_size_accum - accumulate inode sizes for recovery. ++ * @c: UBIFS file-system description object ++ * @key: node key ++ * @deletion: node is for a deletion ++ * @new_size: inode size ++ * ++ * This function has two purposes: ++ * 1) to ensure there are no data nodes that fall outside the inode size ++ * 2) to ensure there are no data nodes for inodes that do not exist ++ * To accomplish those purposes, a rb-tree is constructed containing an entry ++ * for each inode number in the journal that has not been deleted, and recording ++ * the size from the inode node, the maximum size of any data node (also altered ++ * by truncations) and a flag indicating a inode number for which no inode node ++ * was present in the journal. ++ * ++ * Note that there is still the possibility that there are data nodes that have ++ * been committed that are beyond the inode size, however the only way to find ++ * them would be to scan the entire index. Alternatively, some provision could ++ * be made to record the size of inodes at the start of commit, which would seem ++ * very cumbersome for a scenario that is quite unlikely and the only negative ++ * consequence of which is wasted space. ++ * ++ * This functions returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, ++ int deletion, loff_t new_size) ++{ ++ ino_t inum = key_inum(c, key); ++ struct size_entry *e; ++ int err; ++ ++ switch (key_type(c, key)) { ++ case UBIFS_INO_KEY: ++ if (deletion) ++ remove_ino(c, inum); ++ else { ++ e = find_ino(c, inum); ++ if (e) { ++ e->i_size = new_size; ++ e->exists = 1; ++ } else { ++ err = add_ino(c, inum, new_size, 0, 1); ++ if (err) ++ return err; ++ } ++ } ++ break; ++ case UBIFS_DATA_KEY: ++ e = find_ino(c, inum); ++ if (e) { ++ if (new_size > e->d_size) ++ e->d_size = new_size; ++ } else { ++ err = add_ino(c, inum, 0, new_size, 0); ++ if (err) ++ return err; ++ } ++ break; ++ case UBIFS_TRUN_KEY: ++ e = find_ino(c, inum); ++ if (e) ++ e->d_size = new_size; ++ break; ++ } ++ return 0; ++} ++ ++/** ++ * fix_size_in_place - fix inode size in place on flash. ++ * @c: UBIFS file-system description object ++ * @e: inode size information for recovery ++ */ ++static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) ++{ ++ struct ubifs_ino_node *ino = c->sbuf; ++ unsigned char *p; ++ union ubifs_key key; ++ int err, lnum, offs, len; ++ loff_t i_size; ++ uint32_t crc; ++ ++ /* Locate the inode node LEB number and offset */ ++ ino_key_init(c, &key, e->inum); ++ err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs); ++ if (err) ++ goto out; ++ /* ++ * If the size recorded on the inode node is greater than the size that ++ * was calculated from nodes in the journal then don't change the inode. ++ */ ++ i_size = le64_to_cpu(ino->size); ++ if (i_size >= e->d_size) ++ return 0; ++ /* Read the LEB */ ++ err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); ++ if (err) ++ goto out; ++ /* Change the size field and recalculate the CRC */ ++ ino = c->sbuf + offs; ++ ino->size = cpu_to_le64(e->d_size); ++ len = le32_to_cpu(ino->ch.len); ++ crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8); ++ ino->ch.crc = cpu_to_le32(crc); ++ /* Work out where data in the LEB ends and free space begins */ ++ p = c->sbuf; ++ len = c->leb_size - 1; ++ while (p[len] == 0xff) ++ len -= 1; ++ len = ALIGN(len + 1, c->min_io_size); ++ /* Atomically write the fixed LEB back again */ ++ err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); ++ if (err) ++ goto out; ++ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", ++ (unsigned long)e->inum, lnum, offs, i_size, e->d_size); ++ return 0; ++ ++out: ++ ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", ++ (unsigned long)e->inum, e->i_size, e->d_size, err); ++ return err; ++} ++ ++/** ++ * ubifs_recover_size - recover inode size. ++ * @c: UBIFS file-system description object ++ * ++ * This function attempts to fix inode size discrepancies identified by the ++ * 'ubifs_recover_size_accum()' function. ++ * ++ * This functions returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_recover_size(struct ubifs_info *c) ++{ ++ struct rb_node *this = rb_first(&c->size_tree); ++ ++ while (this) { ++ struct size_entry *e; ++ int err; ++ ++ e = rb_entry(this, struct size_entry, rb); ++ if (!e->exists) { ++ union ubifs_key key; ++ ++ ino_key_init(c, &key, e->inum); ++ err = ubifs_tnc_lookup(c, &key, c->sbuf); ++ if (err && err != -ENOENT) ++ return err; ++ if (err == -ENOENT) { ++ /* Remove data nodes that have no inode */ ++ dbg_rcvry("removing ino %lu", ++ (unsigned long)e->inum); ++ err = ubifs_tnc_remove_ino(c, e->inum); ++ if (err) ++ return err; ++ } else { ++ struct ubifs_ino_node *ino = c->sbuf; ++ ++ e->exists = 1; ++ e->i_size = le64_to_cpu(ino->size); ++ } ++ } ++ if (e->exists && e->i_size < e->d_size) { ++ if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { ++ /* Fix the inode size and pin it in memory */ ++ struct inode *inode; ++ ++ inode = ubifs_iget(c->vfs_sb, e->inum); ++ if (IS_ERR(inode)) ++ return PTR_ERR(inode); ++ if (inode->i_size < e->d_size) { ++ dbg_rcvry("ino %lu size %lld -> %lld", ++ (unsigned long)e->inum, ++ e->d_size, inode->i_size); ++ inode->i_size = e->d_size; ++ ubifs_inode(inode)->ui_size = e->d_size; ++ e->inode = inode; ++ this = rb_next(this); ++ continue; ++ } ++ iput(inode); ++ } else { ++ /* Fix the size in place */ ++ err = fix_size_in_place(c, e); ++ if (err) ++ return err; ++ if (e->inode) ++ iput(e->inode); ++ } ++ } ++ this = rb_next(this); ++ rb_erase(&e->rb, &c->size_tree); ++ kfree(e); ++ } ++ return 0; ++} +diff -Nurd linux-2.6.24/fs/ubifs/replay.c ubifs-v2.6.24/fs/ubifs/replay.c +--- linux-2.6.24/fs/ubifs/replay.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/replay.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1084 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file contains journal replay code. It runs when the file-system is being ++ * mounted and requires no locking. ++ * ++ * The larger is the journal, the longer it takes to scan it, so the longer it ++ * takes to mount UBIFS. This is why the journal has limited size which may be ++ * changed depending on the system requirements. But a larger journal gives ++ * faster I/O speed because it writes the index less frequently. So this is a ++ * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the ++ * larger is the journal, the more memory its index may consume. ++ */ ++ ++#include "ubifs.h" ++ ++/* ++ * Replay flags. ++ * ++ * REPLAY_DELETION: node was deleted ++ * REPLAY_REF: node is a reference node ++ */ ++enum { ++ REPLAY_DELETION = 1, ++ REPLAY_REF = 2, ++}; ++ ++/** ++ * struct replay_entry - replay tree entry. ++ * @lnum: logical eraseblock number of the node ++ * @offs: node offset ++ * @len: node length ++ * @sqnum: node sequence number ++ * @flags: replay flags ++ * @rb: links the replay tree ++ * @key: node key ++ * @nm: directory entry name ++ * @old_size: truncation old size ++ * @new_size: truncation new size ++ * @free: amount of free space in a bud ++ * @dirty: amount of dirty space in a bud from padding and deletion nodes ++ * ++ * UBIFS journal replay must compare node sequence numbers, which means it must ++ * build a tree of node information to insert into the TNC. ++ */ ++struct replay_entry { ++ int lnum; ++ int offs; ++ int len; ++ unsigned long long sqnum; ++ int flags; ++ struct rb_node rb; ++ union ubifs_key key; ++ union { ++ struct qstr nm; ++ struct { ++ loff_t old_size; ++ loff_t new_size; ++ }; ++ struct { ++ int free; ++ int dirty; ++ }; ++ }; ++}; ++ ++/** ++ * struct bud_entry - entry in the list of buds to replay. ++ * @list: next bud in the list ++ * @bud: bud description object ++ * @free: free bytes in the bud ++ * @sqnum: reference node sequence number ++ */ ++struct bud_entry { ++ struct list_head list; ++ struct ubifs_bud *bud; ++ int free; ++ unsigned long long sqnum; ++}; ++ ++/** ++ * set_bud_lprops - set free and dirty space used by a bud. ++ * @c: UBIFS file-system description object ++ * @r: replay entry of bud ++ */ ++static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) ++{ ++ const struct ubifs_lprops *lp; ++ int err = 0, dirty; ++ ++ ubifs_get_lprops(c); ++ ++ lp = ubifs_lpt_lookup_dirty(c, r->lnum); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ++ dirty = lp->dirty; ++ if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { ++ /* ++ * The LEB was added to the journal with a starting offset of ++ * zero which means the LEB must have been empty. The LEB ++ * property values should be lp->free == c->leb_size and ++ * lp->dirty == 0, but that is not the case. The reason is that ++ * the LEB was garbage collected. The garbage collector resets ++ * the free and dirty space without recording it anywhere except ++ * lprops, so if there is not a commit then lprops does not have ++ * that information next time the file system is mounted. ++ * ++ * We do not need to adjust free space because the scan has told ++ * us the exact value which is recorded in the replay entry as ++ * r->free. ++ * ++ * However we do need to subtract from the dirty space the ++ * amount of space that the garbage collector reclaimed, which ++ * is the whole LEB minus the amount of space that was free. ++ */ ++ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ lp->free, lp->dirty); ++ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ lp->free, lp->dirty); ++ dirty -= c->leb_size - lp->free; ++ /* ++ * If the replay order was perfect the dirty space would now be ++ * zero. The order is not perfect because the journal heads ++ * race with each other. This is not a problem but is does mean ++ * that the dirty space may temporarily exceed c->leb_size ++ * during the replay. ++ */ ++ if (dirty != 0) ++ dbg_msg("LEB %d lp: %d free %d dirty " ++ "replay: %d free %d dirty", r->lnum, lp->free, ++ lp->dirty, r->free, r->dirty); ++ } ++ lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, ++ lp->flags | LPROPS_TAKEN, 0); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * trun_remove_range - apply a replay entry for a truncation to the TNC. ++ * @c: UBIFS file-system description object ++ * @r: replay entry of truncation ++ */ ++static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) ++{ ++ unsigned min_blk, max_blk; ++ union ubifs_key min_key, max_key; ++ ino_t ino; ++ ++ min_blk = r->new_size / UBIFS_BLOCK_SIZE; ++ if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) ++ min_blk += 1; ++ ++ max_blk = r->old_size / UBIFS_BLOCK_SIZE; ++ if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) ++ max_blk -= 1; ++ ++ ino = key_inum(c, &r->key); ++ ++ data_key_init(c, &min_key, ino, min_blk); ++ data_key_init(c, &max_key, ino, max_blk); ++ ++ return ubifs_tnc_remove_range(c, &min_key, &max_key); ++} ++ ++/** ++ * apply_replay_entry - apply a replay entry to the TNC. ++ * @c: UBIFS file-system description object ++ * @r: replay entry to apply ++ * ++ * Apply a replay entry to the TNC. ++ */ ++static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) ++{ ++ int err, deletion = ((r->flags & REPLAY_DELETION) != 0); ++ ++ dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, ++ r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); ++ ++ /* Set c->replay_sqnum to help deal with dangling branches. */ ++ c->replay_sqnum = r->sqnum; ++ ++ if (r->flags & REPLAY_REF) ++ err = set_bud_lprops(c, r); ++ else if (is_hash_key(c, &r->key)) { ++ if (deletion) ++ err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); ++ else ++ err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, ++ r->len, &r->nm); ++ } else { ++ if (deletion) ++ switch (key_type(c, &r->key)) { ++ case UBIFS_INO_KEY: ++ { ++ ino_t inum = key_inum(c, &r->key); ++ ++ err = ubifs_tnc_remove_ino(c, inum); ++ break; ++ } ++ case UBIFS_TRUN_KEY: ++ err = trun_remove_range(c, r); ++ break; ++ default: ++ err = ubifs_tnc_remove(c, &r->key); ++ break; ++ } ++ else ++ err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, ++ r->len); ++ if (err) ++ return err; ++ ++ if (c->need_recovery) ++ err = ubifs_recover_size_accum(c, &r->key, deletion, ++ r->new_size); ++ } ++ ++ return err; ++} ++ ++/** ++ * destroy_replay_tree - destroy the replay. ++ * @c: UBIFS file-system description object ++ * ++ * Destroy the replay tree. ++ */ ++static void destroy_replay_tree(struct ubifs_info *c) ++{ ++ struct rb_node *this = c->replay_tree.rb_node; ++ struct replay_entry *r; ++ ++ while (this) { ++ if (this->rb_left) { ++ this = this->rb_left; ++ continue; ++ } else if (this->rb_right) { ++ this = this->rb_right; ++ continue; ++ } ++ r = rb_entry(this, struct replay_entry, rb); ++ this = rb_parent(this); ++ if (this) { ++ if (this->rb_left == &r->rb) ++ this->rb_left = NULL; ++ else ++ this->rb_right = NULL; ++ } ++ if (is_hash_key(c, &r->key)) ++ kfree(r->nm.name); ++ kfree(r); ++ } ++ c->replay_tree = RB_ROOT; ++} ++ ++/** ++ * apply_replay_tree - apply the replay tree to the TNC. ++ * @c: UBIFS file-system description object ++ * ++ * Apply the replay tree. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++static int apply_replay_tree(struct ubifs_info *c) ++{ ++ struct rb_node *this = rb_first(&c->replay_tree); ++ ++ while (this) { ++ struct replay_entry *r; ++ int err; ++ ++ cond_resched(); ++ ++ r = rb_entry(this, struct replay_entry, rb); ++ err = apply_replay_entry(c, r); ++ if (err) ++ return err; ++ this = rb_next(this); ++ } ++ return 0; ++} ++ ++/** ++ * insert_node - insert a node to the replay tree. ++ * @c: UBIFS file-system description object ++ * @lnum: node logical eraseblock number ++ * @offs: node offset ++ * @len: node length ++ * @key: node key ++ * @sqnum: sequence number ++ * @deletion: non-zero if this is a deletion ++ * @used: number of bytes in use in a LEB ++ * @old_size: truncation old size ++ * @new_size: truncation new size ++ * ++ * This function inserts a scanned non-direntry node to the replay tree. The ++ * replay tree is an RB-tree containing @struct replay_entry elements which are ++ * indexed by the sequence number. The replay tree is applied at the very end ++ * of the replay process. Since the tree is sorted in sequence number order, ++ * the older modifications are applied first. This function returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, ++ union ubifs_key *key, unsigned long long sqnum, ++ int deletion, int *used, loff_t old_size, ++ loff_t new_size) ++{ ++ struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; ++ struct replay_entry *r; ++ ++ if (key_inum(c, key) >= c->highest_inum) ++ c->highest_inum = key_inum(c, key); ++ ++ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); ++ while (*p) { ++ parent = *p; ++ r = rb_entry(parent, struct replay_entry, rb); ++ if (sqnum < r->sqnum) { ++ p = &(*p)->rb_left; ++ continue; ++ } else if (sqnum > r->sqnum) { ++ p = &(*p)->rb_right; ++ continue; ++ } ++ ubifs_err("duplicate sqnum in replay"); ++ return -EINVAL; ++ } ++ ++ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); ++ if (!r) ++ return -ENOMEM; ++ ++ if (!deletion) ++ *used += ALIGN(len, 8); ++ r->lnum = lnum; ++ r->offs = offs; ++ r->len = len; ++ r->sqnum = sqnum; ++ r->flags = (deletion ? REPLAY_DELETION : 0); ++ r->old_size = old_size; ++ r->new_size = new_size; ++ key_copy(c, key, &r->key); ++ ++ rb_link_node(&r->rb, parent, p); ++ rb_insert_color(&r->rb, &c->replay_tree); ++ return 0; ++} ++ ++/** ++ * insert_dent - insert a directory entry node into the replay tree. ++ * @c: UBIFS file-system description object ++ * @lnum: node logical eraseblock number ++ * @offs: node offset ++ * @len: node length ++ * @key: node key ++ * @name: directory entry name ++ * @nlen: directory entry name length ++ * @sqnum: sequence number ++ * @deletion: non-zero if this is a deletion ++ * @used: number of bytes in use in a LEB ++ * ++ * This function inserts a scanned directory entry node to the replay tree. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ * ++ * This function is also used for extended attribute entries because they are ++ * implemented as directory entry nodes. ++ */ ++static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, ++ union ubifs_key *key, const char *name, int nlen, ++ unsigned long long sqnum, int deletion, int *used) ++{ ++ struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; ++ struct replay_entry *r; ++ char *nbuf; ++ ++ if (key_inum(c, key) >= c->highest_inum) ++ c->highest_inum = key_inum(c, key); ++ ++ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); ++ while (*p) { ++ parent = *p; ++ r = rb_entry(parent, struct replay_entry, rb); ++ if (sqnum < r->sqnum) { ++ p = &(*p)->rb_left; ++ continue; ++ } ++ if (sqnum > r->sqnum) { ++ p = &(*p)->rb_right; ++ continue; ++ } ++ ubifs_err("duplicate sqnum in replay"); ++ return -EINVAL; ++ } ++ ++ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); ++ if (!r) ++ return -ENOMEM; ++ nbuf = kmalloc(nlen + 1, GFP_KERNEL); ++ if (!nbuf) { ++ kfree(r); ++ return -ENOMEM; ++ } ++ ++ if (!deletion) ++ *used += ALIGN(len, 8); ++ r->lnum = lnum; ++ r->offs = offs; ++ r->len = len; ++ r->sqnum = sqnum; ++ r->nm.len = nlen; ++ memcpy(nbuf, name, nlen); ++ nbuf[nlen] = '\0'; ++ r->nm.name = nbuf; ++ r->flags = (deletion ? REPLAY_DELETION : 0); ++ key_copy(c, key, &r->key); ++ ++ ubifs_assert(!*p); ++ rb_link_node(&r->rb, parent, p); ++ rb_insert_color(&r->rb, &c->replay_tree); ++ return 0; ++} ++ ++/** ++ * ubifs_validate_entry - validate directory or extended attribute entry node. ++ * @c: UBIFS file-system description object ++ * @dent: the node to validate ++ * ++ * This function validates directory or extended attribute entry node @dent. ++ * Returns zero if the node is all right and a %-EINVAL if not. ++ */ ++int ubifs_validate_entry(struct ubifs_info *c, ++ const struct ubifs_dent_node *dent) ++{ ++ int key_type = key_type_flash(c, dent->key); ++ int nlen = le16_to_cpu(dent->nlen); ++ ++ if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || ++ dent->type >= UBIFS_ITYPES_CNT || ++ nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || ++ strnlen(dent->name, nlen) != nlen || ++ le64_to_cpu(dent->inum) > MAX_INUM) { ++ ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? ++ "directory entry" : "extended attribute entry"); ++ return -EINVAL; ++ } ++ ++ if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { ++ ubifs_err("bad key type %d", key_type); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * replay_bud - replay a bud logical eraseblock. ++ * @c: UBIFS file-system description object ++ * @lnum: bud logical eraseblock number to replay ++ * @offs: bud start offset ++ * @jhead: journal head to which this bud belongs ++ * @free: amount of free space in the bud is returned here ++ * @dirty: amount of dirty space from padding and deletion nodes is returned ++ * here ++ * ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, ++ int *free, int *dirty) ++{ ++ int err = 0, used = 0; ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ struct ubifs_bud *bud; ++ ++ dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); ++ if (c->need_recovery) ++ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); ++ else ++ sleb = ubifs_scan(c, lnum, offs, c->sbuf); ++ if (IS_ERR(sleb)) ++ return PTR_ERR(sleb); ++ ++ /* ++ * The bud does not have to start from offset zero - the beginning of ++ * the 'lnum' LEB may contain previously committed data. One of the ++ * things we have to do in replay is to correctly update lprops with ++ * newer information about this LEB. ++ * ++ * At this point lprops thinks that this LEB has 'c->leb_size - offs' ++ * bytes of free space because it only contain information about ++ * committed data. ++ * ++ * But we know that real amount of free space is 'c->leb_size - ++ * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and ++ * 'sleb->endpt' is used by bud data. We have to correctly calculate ++ * how much of these data are dirty and update lprops with this ++ * information. ++ * ++ * The dirt in that LEB region is comprised of padding nodes, deletion ++ * nodes, truncation nodes and nodes which are obsoleted by subsequent ++ * nodes in this LEB. So instead of calculating clean space, we ++ * calculate used space ('used' variable). ++ */ ++ ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ int deletion = 0; ++ ++ cond_resched(); ++ ++ if (snod->sqnum >= SQNUM_WATERMARK) { ++ ubifs_err("file system's life ended"); ++ goto out_dump; ++ } ++ ++ if (snod->sqnum > c->max_sqnum) ++ c->max_sqnum = snod->sqnum; ++ ++ switch (snod->type) { ++ case UBIFS_INO_NODE: ++ { ++ struct ubifs_ino_node *ino = snod->node; ++ loff_t new_size = le64_to_cpu(ino->size); ++ ++ if (le32_to_cpu(ino->nlink) == 0) ++ deletion = 1; ++ err = insert_node(c, lnum, snod->offs, snod->len, ++ &snod->key, snod->sqnum, deletion, ++ &used, 0, new_size); ++ break; ++ } ++ case UBIFS_DATA_NODE: ++ { ++ struct ubifs_data_node *dn = snod->node; ++ loff_t new_size = le32_to_cpu(dn->size) + ++ key_block(c, &snod->key) * ++ UBIFS_BLOCK_SIZE; ++ ++ err = insert_node(c, lnum, snod->offs, snod->len, ++ &snod->key, snod->sqnum, deletion, ++ &used, 0, new_size); ++ break; ++ } ++ case UBIFS_DENT_NODE: ++ case UBIFS_XENT_NODE: ++ { ++ struct ubifs_dent_node *dent = snod->node; ++ ++ err = ubifs_validate_entry(c, dent); ++ if (err) ++ goto out_dump; ++ ++ err = insert_dent(c, lnum, snod->offs, snod->len, ++ &snod->key, dent->name, ++ le16_to_cpu(dent->nlen), snod->sqnum, ++ !le64_to_cpu(dent->inum), &used); ++ break; ++ } ++ case UBIFS_TRUN_NODE: ++ { ++ struct ubifs_trun_node *trun = snod->node; ++ loff_t old_size = le64_to_cpu(trun->old_size); ++ loff_t new_size = le64_to_cpu(trun->new_size); ++ union ubifs_key key; ++ ++ /* Validate truncation node */ ++ if (old_size < 0 || old_size > c->max_inode_sz || ++ new_size < 0 || new_size > c->max_inode_sz || ++ old_size <= new_size) { ++ ubifs_err("bad truncation node"); ++ goto out_dump; ++ } ++ ++ /* ++ * Create a fake truncation key just to use the same ++ * functions which expect nodes to have keys. ++ */ ++ trun_key_init(c, &key, le32_to_cpu(trun->inum)); ++ err = insert_node(c, lnum, snod->offs, snod->len, ++ &key, snod->sqnum, 1, &used, ++ old_size, new_size); ++ break; ++ } ++ default: ++ ubifs_err("unexpected node type %d in bud LEB %d:%d", ++ snod->type, lnum, snod->offs); ++ err = -EINVAL; ++ goto out_dump; ++ } ++ if (err) ++ goto out; ++ } ++ ++ bud = ubifs_search_bud(c, lnum); ++ if (!bud) ++ BUG(); ++ ++ ubifs_assert(sleb->endpt - offs >= used); ++ ubifs_assert(sleb->endpt % c->min_io_size == 0); ++ ++ if (sleb->endpt + c->min_io_size <= c->leb_size && ++ !(c->vfs_sb->s_flags & MS_RDONLY)) ++ err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, ++ sleb->endpt, UBI_SHORTTERM); ++ ++ *dirty = sleb->endpt - offs - used; ++ *free = c->leb_size - sleb->endpt; ++ ++out: ++ ubifs_scan_destroy(sleb); ++ return err; ++ ++out_dump: ++ ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); ++ dbg_dump_node(c, snod->node); ++ ubifs_scan_destroy(sleb); ++ return -EINVAL; ++} ++ ++/** ++ * insert_ref_node - insert a reference node to the replay tree. ++ * @c: UBIFS file-system description object ++ * @lnum: node logical eraseblock number ++ * @offs: node offset ++ * @sqnum: sequence number ++ * @free: amount of free space in bud ++ * @dirty: amount of dirty space from padding and deletion nodes ++ * ++ * This function inserts a reference node to the replay tree and returns zero ++ * in case of success or a negative error code in case of failure. ++ */ ++static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, ++ unsigned long long sqnum, int free, int dirty) ++{ ++ struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; ++ struct replay_entry *r; ++ ++ dbg_mnt("add ref LEB %d:%d", lnum, offs); ++ while (*p) { ++ parent = *p; ++ r = rb_entry(parent, struct replay_entry, rb); ++ if (sqnum < r->sqnum) { ++ p = &(*p)->rb_left; ++ continue; ++ } else if (sqnum > r->sqnum) { ++ p = &(*p)->rb_right; ++ continue; ++ } ++ ubifs_err("duplicate sqnum in replay tree"); ++ return -EINVAL; ++ } ++ ++ r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); ++ if (!r) ++ return -ENOMEM; ++ ++ r->lnum = lnum; ++ r->offs = offs; ++ r->sqnum = sqnum; ++ r->flags = REPLAY_REF; ++ r->free = free; ++ r->dirty = dirty; ++ ++ rb_link_node(&r->rb, parent, p); ++ rb_insert_color(&r->rb, &c->replay_tree); ++ return 0; ++} ++ ++/** ++ * replay_buds - replay all buds. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int replay_buds(struct ubifs_info *c) ++{ ++ struct bud_entry *b; ++ int err, uninitialized_var(free), uninitialized_var(dirty); ++ ++ list_for_each_entry(b, &c->replay_buds, list) { ++ err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, ++ &free, &dirty); ++ if (err) ++ return err; ++ err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, ++ free, dirty); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * destroy_bud_list - destroy the list of buds to replay. ++ * @c: UBIFS file-system description object ++ */ ++static void destroy_bud_list(struct ubifs_info *c) ++{ ++ struct bud_entry *b; ++ ++ while (!list_empty(&c->replay_buds)) { ++ b = list_entry(c->replay_buds.next, struct bud_entry, list); ++ list_del(&b->list); ++ kfree(b); ++ } ++} ++ ++/** ++ * add_replay_bud - add a bud to the list of buds to replay. ++ * @c: UBIFS file-system description object ++ * @lnum: bud logical eraseblock number to replay ++ * @offs: bud start offset ++ * @jhead: journal head to which this bud belongs ++ * @sqnum: reference node sequence number ++ * ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, ++ unsigned long long sqnum) ++{ ++ struct ubifs_bud *bud; ++ struct bud_entry *b; ++ ++ dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); ++ ++ bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); ++ if (!bud) ++ return -ENOMEM; ++ ++ b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); ++ if (!b) { ++ kfree(bud); ++ return -ENOMEM; ++ } ++ ++ bud->lnum = lnum; ++ bud->start = offs; ++ bud->jhead = jhead; ++ ubifs_add_bud(c, bud); ++ ++ b->bud = bud; ++ b->sqnum = sqnum; ++ list_add_tail(&b->list, &c->replay_buds); ++ ++ return 0; ++} ++ ++/** ++ * validate_ref - validate a reference node. ++ * @c: UBIFS file-system description object ++ * @ref: the reference node to validate ++ * @ref_lnum: LEB number of the reference node ++ * @ref_offs: reference node offset ++ * ++ * This function returns %1 if a bud reference already exists for the LEB. %0 is ++ * returned if the reference node is new, otherwise %-EINVAL is returned if ++ * validation failed. ++ */ ++static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) ++{ ++ struct ubifs_bud *bud; ++ int lnum = le32_to_cpu(ref->lnum); ++ unsigned int offs = le32_to_cpu(ref->offs); ++ unsigned int jhead = le32_to_cpu(ref->jhead); ++ ++ /* ++ * ref->offs may point to the end of LEB when the journal head points ++ * to the end of LEB and we write reference node for it during commit. ++ * So this is why we require 'offs > c->leb_size'. ++ */ ++ if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || ++ lnum < c->main_first || offs > c->leb_size || ++ offs & (c->min_io_size - 1)) ++ return -EINVAL; ++ ++ /* Make sure we have not already looked at this bud */ ++ bud = ubifs_search_bud(c, lnum); ++ if (bud) { ++ if (bud->jhead == jhead && bud->start <= offs) ++ return 1; ++ ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * replay_log_leb - replay a log logical eraseblock. ++ * @c: UBIFS file-system description object ++ * @lnum: log logical eraseblock to replay ++ * @offs: offset to start replaying from ++ * @sbuf: scan buffer ++ * ++ * This function replays a log LEB and returns zero in case of success, %1 if ++ * this is the last LEB in the log, and a negative error code in case of ++ * failure. ++ */ ++static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) ++{ ++ int err; ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ const struct ubifs_cs_node *node; ++ ++ dbg_mnt("replay log LEB %d:%d", lnum, offs); ++ sleb = ubifs_scan(c, lnum, offs, sbuf); ++ if (IS_ERR(sleb)) { ++ if (c->need_recovery) ++ sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); ++ if (IS_ERR(sleb)) ++ return PTR_ERR(sleb); ++ } ++ ++ if (sleb->nodes_cnt == 0) { ++ err = 1; ++ goto out; ++ } ++ ++ node = sleb->buf; ++ ++ snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); ++ if (c->cs_sqnum == 0) { ++ /* ++ * This is the first log LEB we are looking at, make sure that ++ * the first node is a commit start node. Also record its ++ * sequence number so that UBIFS can determine where the log ++ * ends, because all nodes which were have higher sequence ++ * numbers. ++ */ ++ if (snod->type != UBIFS_CS_NODE) { ++ dbg_err("first log node at LEB %d:%d is not CS node", ++ lnum, offs); ++ goto out_dump; ++ } ++ if (le64_to_cpu(node->cmt_no) != c->cmt_no) { ++ dbg_err("first CS node at LEB %d:%d has wrong " ++ "commit number %llu expected %llu", ++ lnum, offs, ++ (unsigned long long)le64_to_cpu(node->cmt_no), ++ c->cmt_no); ++ goto out_dump; ++ } ++ ++ c->cs_sqnum = le64_to_cpu(node->ch.sqnum); ++ dbg_mnt("commit start sqnum %llu", c->cs_sqnum); ++ } ++ ++ if (snod->sqnum < c->cs_sqnum) { ++ /* ++ * This means that we reached end of log and now ++ * look to the older log data, which was already ++ * committed but the eraseblock was not erased (UBIFS ++ * only un-maps it). So this basically means we have to ++ * exit with "end of log" code. ++ */ ++ err = 1; ++ goto out; ++ } ++ ++ /* Make sure the first node sits at offset zero of the LEB */ ++ if (snod->offs != 0) { ++ dbg_err("first node is not at zero offset"); ++ goto out_dump; ++ } ++ ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ ++ cond_resched(); ++ ++ if (snod->sqnum >= SQNUM_WATERMARK) { ++ ubifs_err("file system's life ended"); ++ goto out_dump; ++ } ++ ++ if (snod->sqnum < c->cs_sqnum) { ++ dbg_err("bad sqnum %llu, commit sqnum %llu", ++ snod->sqnum, c->cs_sqnum); ++ goto out_dump; ++ } ++ ++ if (snod->sqnum > c->max_sqnum) ++ c->max_sqnum = snod->sqnum; ++ ++ switch (snod->type) { ++ case UBIFS_REF_NODE: { ++ const struct ubifs_ref_node *ref = snod->node; ++ ++ err = validate_ref(c, ref); ++ if (err == 1) ++ break; /* Already have this bud */ ++ if (err) ++ goto out_dump; ++ ++ err = add_replay_bud(c, le32_to_cpu(ref->lnum), ++ le32_to_cpu(ref->offs), ++ le32_to_cpu(ref->jhead), ++ snod->sqnum); ++ if (err) ++ goto out; ++ ++ break; ++ } ++ case UBIFS_CS_NODE: ++ /* Make sure it sits at the beginning of LEB */ ++ if (snod->offs != 0) { ++ ubifs_err("unexpected node in log"); ++ goto out_dump; ++ } ++ break; ++ default: ++ ubifs_err("unexpected node in log"); ++ goto out_dump; ++ } ++ } ++ ++ if (sleb->endpt || c->lhead_offs >= c->leb_size) { ++ c->lhead_lnum = lnum; ++ c->lhead_offs = sleb->endpt; ++ } ++ ++ err = !sleb->endpt; ++out: ++ ubifs_scan_destroy(sleb); ++ return err; ++ ++out_dump: ++ ubifs_err("log error detected while replying the log at LEB %d:%d", ++ lnum, offs + snod->offs); ++ dbg_dump_node(c, snod->node); ++ ubifs_scan_destroy(sleb); ++ return -EINVAL; ++} ++ ++/** ++ * take_ihead - update the status of the index head in lprops to 'taken'. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns the amount of free space in the index head LEB or a ++ * negative error code. ++ */ ++static int take_ihead(struct ubifs_info *c) ++{ ++ const struct ubifs_lprops *lp; ++ int err, free; ++ ++ ubifs_get_lprops(c); ++ ++ lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ++ free = lp->free; ++ ++ lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, ++ lp->flags | LPROPS_TAKEN, 0); ++ if (IS_ERR(lp)) { ++ err = PTR_ERR(lp); ++ goto out; ++ } ++ ++ err = free; ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * ubifs_replay_journal - replay journal. ++ * @c: UBIFS file-system description object ++ * ++ * This function scans the journal, replays and cleans it up. It makes sure all ++ * memory data structures related to uncommitted journal are built (dirty TNC ++ * tree, tree of buds, modified lprops, etc). ++ */ ++int ubifs_replay_journal(struct ubifs_info *c) ++{ ++ int err, i, lnum, offs, free; ++ void *sbuf = NULL; ++ ++ BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); ++ ++ /* Update the status of the index head in lprops to 'taken' */ ++ free = take_ihead(c); ++ if (free < 0) ++ return free; /* Error code */ ++ ++ if (c->ihead_offs != c->leb_size - free) { ++ ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, ++ c->ihead_offs); ++ return -EINVAL; ++ } ++ ++ sbuf = vmalloc(c->leb_size); ++ if (!sbuf) ++ return -ENOMEM; ++ ++ dbg_mnt("start replaying the journal"); ++ ++ c->replaying = 1; ++ ++ lnum = c->ltail_lnum = c->lhead_lnum; ++ offs = c->lhead_offs; ++ ++ for (i = 0; i < c->log_lebs; i++, lnum++) { ++ if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { ++ /* ++ * The log is logically circular, we reached the last ++ * LEB, switch to the first one. ++ */ ++ lnum = UBIFS_LOG_LNUM; ++ offs = 0; ++ } ++ err = replay_log_leb(c, lnum, offs, sbuf); ++ if (err == 1) ++ /* We hit the end of the log */ ++ break; ++ if (err) ++ goto out; ++ offs = 0; ++ } ++ ++ err = replay_buds(c); ++ if (err) ++ goto out; ++ ++ err = apply_replay_tree(c); ++ if (err) ++ goto out; ++ ++ /* ++ * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable ++ * to roughly estimate index growth. Things like @c->min_idx_lebs ++ * depend on it. This means we have to initialize it to make sure ++ * budgeting works properly. ++ */ ++ c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); ++ c->budg_uncommitted_idx *= c->max_idx_node_sz; ++ ++ ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); ++ dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " ++ "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, ++ (unsigned long)c->highest_inum); ++out: ++ destroy_replay_tree(c); ++ destroy_bud_list(c); ++ vfree(sbuf); ++ c->replaying = 0; ++ return err; ++} +diff -Nurd linux-2.6.24/fs/ubifs/sb.c ubifs-v2.6.24/fs/ubifs/sb.c +--- linux-2.6.24/fs/ubifs/sb.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/sb.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,656 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file implements UBIFS superblock. The superblock is stored at the first ++ * LEB of the volume and is never changed by UBIFS. Only user-space tools may ++ * change it. The superblock node mostly contains geometry information. ++ */ ++ ++#include "ubifs.h" ++#include <linux/random.h> ++ ++/* ++ * Default journal size in logical eraseblocks as a percent of total ++ * flash size. ++ */ ++#define DEFAULT_JNL_PERCENT 5 ++ ++/* Default maximum journal size in bytes */ ++#define DEFAULT_MAX_JNL (32*1024*1024) ++ ++/* Default indexing tree fanout */ ++#define DEFAULT_FANOUT 8 ++ ++/* Default number of data journal heads */ ++#define DEFAULT_JHEADS_CNT 1 ++ ++/* Default positions of different LEBs in the main area */ ++#define DEFAULT_IDX_LEB 0 ++#define DEFAULT_DATA_LEB 1 ++#define DEFAULT_GC_LEB 2 ++ ++/* Default number of LEB numbers in LPT's save table */ ++#define DEFAULT_LSAVE_CNT 256 ++ ++/* Default reserved pool size as a percent of maximum free space */ ++#define DEFAULT_RP_PERCENT 5 ++ ++/* The default maximum size of reserved pool in bytes */ ++#define DEFAULT_MAX_RP_SIZE (5*1024*1024) ++ ++/* Default UBIFS compressor */ ++#define DEFAULT_COMPRESSOR UBIFS_COMPR_LZO ++ ++/* Default time granularity in nanoseconds */ ++#define DEFAULT_TIME_GRAN 1000000000 ++ ++/** ++ * create_default_filesystem - format empty UBI volume. ++ * @c: UBIFS file-system description object ++ * ++ * This function creates default empty file-system. Returns zero in case of ++ * success and a negative error code in case of failure. ++ */ ++static int create_default_filesystem(struct ubifs_info *c) ++{ ++ struct ubifs_sb_node *sup; ++ struct ubifs_mst_node *mst; ++ struct ubifs_idx_node *idx; ++ struct ubifs_branch *br; ++ struct ubifs_ino_node *ino; ++ struct ubifs_cs_node *cs; ++ union ubifs_key key; ++ int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; ++ int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; ++ int min_leb_cnt = UBIFS_MIN_LEB_CNT; ++ long long tmp64, main_bytes; ++ __le64 tmp_le64; ++ ++ /* Some functions called from here depend on the @c->key_len filed */ ++ c->key_len = UBIFS_SK_LEN; ++ ++ /* ++ * First of all, we have to calculate default file-system geometry - ++ * log size, journal size, etc. ++ */ ++ if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT) ++ /* We can first multiply then divide and have no overflow */ ++ jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100; ++ else ++ jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT; ++ ++ if (jnl_lebs < UBIFS_MIN_JNL_LEBS) ++ jnl_lebs = UBIFS_MIN_JNL_LEBS; ++ if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL) ++ jnl_lebs = DEFAULT_MAX_JNL / c->leb_size; ++ ++ /* ++ * The log should be large enough to fit reference nodes for all bud ++ * LEBs. Because buds do not have to start from the beginning of LEBs ++ * (half of the LEB may contain committed data), the log should ++ * generally be larger, make it twice as large. ++ */ ++ tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1; ++ log_lebs = tmp / c->leb_size; ++ /* Plus one LEB reserved for commit */ ++ log_lebs += 1; ++ if (c->leb_cnt - min_leb_cnt > 8) { ++ /* And some extra space to allow writes while committing */ ++ log_lebs += 1; ++ min_leb_cnt += 1; ++ } ++ ++ max_buds = jnl_lebs - log_lebs; ++ if (max_buds < UBIFS_MIN_BUD_LEBS) ++ max_buds = UBIFS_MIN_BUD_LEBS; ++ ++ /* ++ * Orphan nodes are stored in a separate area. One node can store a lot ++ * of orphan inode numbers, but when new orphan comes we just add a new ++ * orphan node. At some point the nodes are consolidated into one ++ * orphan node. ++ */ ++ orph_lebs = UBIFS_MIN_ORPH_LEBS; ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ if (c->leb_cnt - min_leb_cnt > 1) ++ /* ++ * For debugging purposes it is better to have at least 2 ++ * orphan LEBs, because the orphan subsystem would need to do ++ * consolidations and would be stressed more. ++ */ ++ orph_lebs += 1; ++#endif ++ ++ main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs; ++ main_lebs -= orph_lebs; ++ ++ lpt_first = UBIFS_LOG_LNUM + log_lebs; ++ c->lsave_cnt = DEFAULT_LSAVE_CNT; ++ c->max_leb_cnt = c->leb_cnt; ++ err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs, ++ &big_lpt); ++ if (err) ++ return err; ++ ++ dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first, ++ lpt_first + lpt_lebs - 1); ++ ++ main_first = c->leb_cnt - main_lebs; ++ ++ /* Create default superblock */ ++ tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); ++ sup = kzalloc(tmp, GFP_KERNEL); ++ if (!sup) ++ return -ENOMEM; ++ ++ tmp64 = (long long)max_buds * c->leb_size; ++ if (big_lpt) ++ sup_flags |= UBIFS_FLG_BIGLPT; ++ ++ sup->ch.node_type = UBIFS_SB_NODE; ++ sup->key_hash = UBIFS_KEY_HASH_R5; ++ sup->flags = cpu_to_le32(sup_flags); ++ sup->min_io_size = cpu_to_le32(c->min_io_size); ++ sup->leb_size = cpu_to_le32(c->leb_size); ++ sup->leb_cnt = cpu_to_le32(c->leb_cnt); ++ sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt); ++ sup->max_bud_bytes = cpu_to_le64(tmp64); ++ sup->log_lebs = cpu_to_le32(log_lebs); ++ sup->lpt_lebs = cpu_to_le32(lpt_lebs); ++ sup->orph_lebs = cpu_to_le32(orph_lebs); ++ sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT); ++ sup->fanout = cpu_to_le32(DEFAULT_FANOUT); ++ sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); ++ sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); ++ sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); ++ if (c->mount_opts.override_compr) ++ sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); ++ else ++ sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); ++ ++ generate_random_uuid(sup->uuid); ++ ++ main_bytes = (long long)main_lebs * c->leb_size; ++ tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100); ++ if (tmp64 > DEFAULT_MAX_RP_SIZE) ++ tmp64 = DEFAULT_MAX_RP_SIZE; ++ sup->rp_size = cpu_to_le64(tmp64); ++ sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); ++ ++ err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); ++ kfree(sup); ++ if (err) ++ return err; ++ ++ dbg_gen("default superblock created at LEB 0:0"); ++ ++ /* Create default master node */ ++ mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); ++ if (!mst) ++ return -ENOMEM; ++ ++ mst->ch.node_type = UBIFS_MST_NODE; ++ mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM); ++ mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO); ++ mst->cmt_no = 0; ++ mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); ++ mst->root_offs = 0; ++ tmp = ubifs_idx_node_sz(c, 1); ++ mst->root_len = cpu_to_le32(tmp); ++ mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB); ++ mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB); ++ mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size)); ++ mst->index_size = cpu_to_le64(ALIGN(tmp, 8)); ++ mst->lpt_lnum = cpu_to_le32(c->lpt_lnum); ++ mst->lpt_offs = cpu_to_le32(c->lpt_offs); ++ mst->nhead_lnum = cpu_to_le32(c->nhead_lnum); ++ mst->nhead_offs = cpu_to_le32(c->nhead_offs); ++ mst->ltab_lnum = cpu_to_le32(c->ltab_lnum); ++ mst->ltab_offs = cpu_to_le32(c->ltab_offs); ++ mst->lsave_lnum = cpu_to_le32(c->lsave_lnum); ++ mst->lsave_offs = cpu_to_le32(c->lsave_offs); ++ mst->lscan_lnum = cpu_to_le32(main_first); ++ mst->empty_lebs = cpu_to_le32(main_lebs - 2); ++ mst->idx_lebs = cpu_to_le32(1); ++ mst->leb_cnt = cpu_to_le32(c->leb_cnt); ++ ++ /* Calculate lprops statistics */ ++ tmp64 = main_bytes; ++ tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); ++ tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); ++ mst->total_free = cpu_to_le64(tmp64); ++ ++ tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size); ++ ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) - ++ UBIFS_INO_NODE_SZ; ++ tmp64 += ino_waste; ++ tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8); ++ mst->total_dirty = cpu_to_le64(tmp64); ++ ++ /* The indexing LEB does not contribute to dark space */ ++ tmp64 = (c->main_lebs - 1) * c->dark_wm; ++ mst->total_dark = cpu_to_le64(tmp64); ++ ++ mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); ++ ++ err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0, ++ UBI_UNKNOWN); ++ if (err) { ++ kfree(mst); ++ return err; ++ } ++ err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0, ++ UBI_UNKNOWN); ++ kfree(mst); ++ if (err) ++ return err; ++ ++ dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM); ++ ++ /* Create the root indexing node */ ++ tmp = ubifs_idx_node_sz(c, 1); ++ idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); ++ if (!idx) ++ return -ENOMEM; ++ ++ c->key_fmt = UBIFS_SIMPLE_KEY_FMT; ++ c->key_hash = key_r5_hash; ++ ++ idx->ch.node_type = UBIFS_IDX_NODE; ++ idx->child_cnt = cpu_to_le16(1); ++ ino_key_init(c, &key, UBIFS_ROOT_INO); ++ br = ubifs_idx_branch(c, idx, 0); ++ key_write_idx(c, &key, &br->key); ++ br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB); ++ br->len = cpu_to_le32(UBIFS_INO_NODE_SZ); ++ err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0, ++ UBI_UNKNOWN); ++ kfree(idx); ++ if (err) ++ return err; ++ ++ dbg_gen("default root indexing node created LEB %d:0", ++ main_first + DEFAULT_IDX_LEB); ++ ++ /* Create default root inode */ ++ tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size); ++ ino = kzalloc(tmp, GFP_KERNEL); ++ if (!ino) ++ return -ENOMEM; ++ ++ ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO); ++ ino->ch.node_type = UBIFS_INO_NODE; ++ ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); ++ ino->nlink = cpu_to_le32(2); ++ tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); ++ ino->atime_sec = tmp_le64; ++ ino->ctime_sec = tmp_le64; ++ ino->mtime_sec = tmp_le64; ++ ino->atime_nsec = 0; ++ ino->ctime_nsec = 0; ++ ino->mtime_nsec = 0; ++ ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO); ++ ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ); ++ ++ /* Set compression enabled by default */ ++ ino->flags = cpu_to_le32(UBIFS_COMPR_FL); ++ ++ err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ, ++ main_first + DEFAULT_DATA_LEB, 0, ++ UBI_UNKNOWN); ++ kfree(ino); ++ if (err) ++ return err; ++ ++ dbg_gen("root inode created at LEB %d:0", ++ main_first + DEFAULT_DATA_LEB); ++ ++ /* ++ * The first node in the log has to be the commit start node. This is ++ * always the case during normal file-system operation. Write a fake ++ * commit start node to the log. ++ */ ++ tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size); ++ cs = kzalloc(tmp, GFP_KERNEL); ++ if (!cs) ++ return -ENOMEM; ++ ++ cs->ch.node_type = UBIFS_CS_NODE; ++ err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, ++ 0, UBI_UNKNOWN); ++ kfree(cs); ++ ++ ubifs_msg("default file-system created"); ++ return 0; ++} ++ ++/** ++ * validate_sb - validate superblock node. ++ * @c: UBIFS file-system description object ++ * @sup: superblock node ++ * ++ * This function validates superblock node @sup. Since most of data was read ++ * from the superblock and stored in @c, the function validates fields in @c ++ * instead. Returns zero in case of success and %-EINVAL in case of validation ++ * failure. ++ */ ++static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) ++{ ++ long long max_bytes; ++ int err = 1, min_leb_cnt; ++ ++ if (!c->key_hash) { ++ err = 2; ++ goto failed; ++ } ++ ++ if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) { ++ err = 3; ++ goto failed; ++ } ++ ++ if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { ++ ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", ++ le32_to_cpu(sup->min_io_size), c->min_io_size); ++ goto failed; ++ } ++ ++ if (le32_to_cpu(sup->leb_size) != c->leb_size) { ++ ubifs_err("LEB size mismatch: %d in superblock, %d real", ++ le32_to_cpu(sup->leb_size), c->leb_size); ++ goto failed; ++ } ++ ++ if (c->log_lebs < UBIFS_MIN_LOG_LEBS || ++ c->lpt_lebs < UBIFS_MIN_LPT_LEBS || ++ c->orph_lebs < UBIFS_MIN_ORPH_LEBS || ++ c->main_lebs < UBIFS_MIN_MAIN_LEBS) { ++ err = 4; ++ goto failed; ++ } ++ ++ /* ++ * Calculate minimum allowed amount of main area LEBs. This is very ++ * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we ++ * have just read from the superblock. ++ */ ++ min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs; ++ min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; ++ ++ if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { ++ ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " ++ "%d minimum required", c->leb_cnt, c->vi.size, ++ min_leb_cnt); ++ goto failed; ++ } ++ ++ if (c->max_leb_cnt < c->leb_cnt) { ++ ubifs_err("max. LEB count %d less than LEB count %d", ++ c->max_leb_cnt, c->leb_cnt); ++ goto failed; ++ } ++ ++ if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { ++ err = 7; ++ goto failed; ++ } ++ ++ if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || ++ c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { ++ err = 8; ++ goto failed; ++ } ++ ++ if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 || ++ c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) { ++ err = 9; ++ goto failed; ++ } ++ ++ if (c->fanout < UBIFS_MIN_FANOUT || ++ ubifs_idx_node_sz(c, c->fanout) > c->leb_size) { ++ err = 10; ++ goto failed; ++ } ++ ++ if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT && ++ c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - ++ c->log_lebs - c->lpt_lebs - c->orph_lebs)) { ++ err = 11; ++ goto failed; ++ } ++ ++ if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs + ++ c->orph_lebs + c->main_lebs != c->leb_cnt) { ++ err = 12; ++ goto failed; ++ } ++ ++ if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { ++ err = 13; ++ goto failed; ++ } ++ ++ max_bytes = c->main_lebs * (long long)c->leb_size; ++ if (c->rp_size < 0 || max_bytes < c->rp_size) { ++ err = 14; ++ goto failed; ++ } ++ ++ if (le32_to_cpu(sup->time_gran) > 1000000000 || ++ le32_to_cpu(sup->time_gran) < 1) { ++ err = 15; ++ goto failed; ++ } ++ ++ return 0; ++ ++failed: ++ ubifs_err("bad superblock, error %d", err); ++ dbg_dump_node(c, sup); ++ return -EINVAL; ++} ++ ++/** ++ * ubifs_read_sb_node - read superblock node. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns a pointer to the superblock node or a negative error ++ * code. ++ */ ++struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) ++{ ++ struct ubifs_sb_node *sup; ++ int err; ++ ++ sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS); ++ if (!sup) ++ return ERR_PTR(-ENOMEM); ++ ++ err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ, ++ UBIFS_SB_LNUM, 0); ++ if (err) { ++ kfree(sup); ++ return ERR_PTR(err); ++ } ++ ++ return sup; ++} ++ ++/** ++ * ubifs_write_sb_node - write superblock node. ++ * @c: UBIFS file-system description object ++ * @sup: superblock node read with 'ubifs_read_sb_node()' ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup) ++{ ++ int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size); ++ ++ ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1); ++ return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM); ++} ++ ++/** ++ * ubifs_read_superblock - read superblock. ++ * @c: UBIFS file-system description object ++ * ++ * This function finds, reads and checks the superblock. If an empty UBI volume ++ * is being mounted, this function creates default superblock. Returns zero in ++ * case of success, and a negative error code in case of failure. ++ */ ++int ubifs_read_superblock(struct ubifs_info *c) ++{ ++ int err, sup_flags; ++ struct ubifs_sb_node *sup; ++ ++ if (c->empty) { ++ err = create_default_filesystem(c); ++ if (err) ++ return err; ++ } ++ ++ sup = ubifs_read_sb_node(c); ++ if (IS_ERR(sup)) ++ return PTR_ERR(sup); ++ ++ c->fmt_version = le32_to_cpu(sup->fmt_version); ++ c->ro_compat_version = le32_to_cpu(sup->ro_compat_version); ++ ++ /* ++ * The software supports all previous versions but not future versions, ++ * due to the unavailability of time-travelling equipment. ++ */ ++ if (c->fmt_version > UBIFS_FORMAT_VERSION) { ++ struct super_block *sb = c->vfs_sb; ++ int mounting_ro = sb->s_flags & MS_RDONLY; ++ ++ ubifs_assert(!c->ro_media || mounting_ro); ++ if (!mounting_ro || ++ c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { ++ ubifs_err("on-flash format version is w%d/r%d, but " ++ "software only supports up to version " ++ "w%d/r%d", c->fmt_version, ++ c->ro_compat_version, UBIFS_FORMAT_VERSION, ++ UBIFS_RO_COMPAT_VERSION); ++ if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { ++ ubifs_msg("only R/O mounting is possible"); ++ err = -EROFS; ++ } else ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * The FS is mounted R/O, and the media format is ++ * R/O-compatible with the UBIFS implementation, so we can ++ * mount. ++ */ ++ c->rw_incompat = 1; ++ } ++ ++ if (c->fmt_version < 3) { ++ ubifs_err("on-flash format version %d is not supported", ++ c->fmt_version); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ switch (sup->key_hash) { ++ case UBIFS_KEY_HASH_R5: ++ c->key_hash = key_r5_hash; ++ c->key_hash_type = UBIFS_KEY_HASH_R5; ++ break; ++ ++ case UBIFS_KEY_HASH_TEST: ++ c->key_hash = key_test_hash; ++ c->key_hash_type = UBIFS_KEY_HASH_TEST; ++ break; ++ }; ++ ++ c->key_fmt = sup->key_fmt; ++ ++ switch (c->key_fmt) { ++ case UBIFS_SIMPLE_KEY_FMT: ++ c->key_len = UBIFS_SK_LEN; ++ break; ++ default: ++ ubifs_err("unsupported key format"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ c->leb_cnt = le32_to_cpu(sup->leb_cnt); ++ c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt); ++ c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes); ++ c->log_lebs = le32_to_cpu(sup->log_lebs); ++ c->lpt_lebs = le32_to_cpu(sup->lpt_lebs); ++ c->orph_lebs = le32_to_cpu(sup->orph_lebs); ++ c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; ++ c->fanout = le32_to_cpu(sup->fanout); ++ c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); ++ c->rp_size = le64_to_cpu(sup->rp_size); ++ c->rp_uid = le32_to_cpu(sup->rp_uid); ++ c->rp_gid = le32_to_cpu(sup->rp_gid); ++ sup_flags = le32_to_cpu(sup->flags); ++ if (!c->mount_opts.override_compr) ++ c->default_compr = le16_to_cpu(sup->default_compr); ++ ++ c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); ++ memcpy(&c->uuid, &sup->uuid, 16); ++ c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); ++ ++ /* Automatically increase file system size to the maximum size */ ++ c->old_leb_cnt = c->leb_cnt; ++ if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { ++ c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); ++ if (c->vfs_sb->s_flags & MS_RDONLY) ++ dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", ++ c->old_leb_cnt, c->leb_cnt); ++ else { ++ dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs", ++ c->old_leb_cnt, c->leb_cnt); ++ sup->leb_cnt = cpu_to_le32(c->leb_cnt); ++ err = ubifs_write_sb_node(c, sup); ++ if (err) ++ goto out; ++ c->old_leb_cnt = c->leb_cnt; ++ } ++ } ++ ++ c->log_bytes = (long long)c->log_lebs * c->leb_size; ++ c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1; ++ c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs; ++ c->lpt_last = c->lpt_first + c->lpt_lebs - 1; ++ c->orph_first = c->lpt_last + 1; ++ c->orph_last = c->orph_first + c->orph_lebs - 1; ++ c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; ++ c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; ++ c->main_first = c->leb_cnt - c->main_lebs; ++ ++ err = validate_sb(c, sup); ++out: ++ kfree(sup); ++ return err; ++} +diff -Nurd linux-2.6.24/fs/ubifs/scan.c ubifs-v2.6.24/fs/ubifs/scan.c +--- linux-2.6.24/fs/ubifs/scan.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/scan.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,362 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements the scan which is a general-purpose function for ++ * determining what nodes are in an eraseblock. The scan is used to replay the ++ * journal, to do garbage collection. for the TNC in-the-gaps method, and by ++ * debugging functions. ++ */ ++ ++#include "ubifs.h" ++ ++/** ++ * scan_padding_bytes - scan for padding bytes. ++ * @buf: buffer to scan ++ * @len: length of buffer ++ * ++ * This function returns the number of padding bytes on success and ++ * %SCANNED_GARBAGE on failure. ++ */ ++static int scan_padding_bytes(void *buf, int len) ++{ ++ int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len); ++ uint8_t *p = buf; ++ ++ dbg_scan("not a node"); ++ ++ while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE) ++ pad_len += 1; ++ ++ if (!pad_len || (pad_len & 7)) ++ return SCANNED_GARBAGE; ++ ++ dbg_scan("%d padding bytes", pad_len); ++ ++ return pad_len; ++} ++ ++/** ++ * ubifs_scan_a_node - scan for a node or padding. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to scan ++ * @len: length of buffer ++ * @lnum: logical eraseblock number ++ * @offs: offset within the logical eraseblock ++ * @quiet: print no messages ++ * ++ * This function returns a scanning code to indicate what was scanned. ++ */ ++int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, ++ int offs, int quiet) ++{ ++ struct ubifs_ch *ch = buf; ++ uint32_t magic; ++ ++ magic = le32_to_cpu(ch->magic); ++ ++ if (magic == 0xFFFFFFFF) { ++ dbg_scan("hit empty space"); ++ return SCANNED_EMPTY_SPACE; ++ } ++ ++ if (magic != UBIFS_NODE_MAGIC) ++ return scan_padding_bytes(buf, len); ++ ++ if (len < UBIFS_CH_SZ) ++ return SCANNED_GARBAGE; ++ ++ dbg_scan("scanning %s", dbg_ntype(ch->node_type)); ++ ++ if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) ++ return SCANNED_A_CORRUPT_NODE; ++ ++ if (ch->node_type == UBIFS_PAD_NODE) { ++ struct ubifs_pad_node *pad = buf; ++ int pad_len = le32_to_cpu(pad->pad_len); ++ int node_len = le32_to_cpu(ch->len); ++ ++ /* Validate the padding node */ ++ if (pad_len < 0 || ++ offs + node_len + pad_len > c->leb_size) { ++ if (!quiet) { ++ ubifs_err("bad pad node at LEB %d:%d", ++ lnum, offs); ++ dbg_dump_node(c, pad); ++ } ++ return SCANNED_A_BAD_PAD_NODE; ++ } ++ ++ /* Make the node pads to 8-byte boundary */ ++ if ((node_len + pad_len) & 7) { ++ if (!quiet) { ++ dbg_err("bad padding length %d - %d", ++ offs, offs + node_len + pad_len); ++ } ++ return SCANNED_A_BAD_PAD_NODE; ++ } ++ ++ dbg_scan("%d bytes padded, offset now %d", ++ pad_len, ALIGN(offs + node_len + pad_len, 8)); ++ ++ return node_len + pad_len; ++ } ++ ++ return SCANNED_A_NODE; ++} ++ ++/** ++ * ubifs_start_scan - create LEB scanning information at start of scan. ++ * @c: UBIFS file-system description object ++ * @lnum: logical eraseblock number ++ * @offs: offset to start at (usually zero) ++ * @sbuf: scan buffer (must be c->leb_size) ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, ++ int offs, void *sbuf) ++{ ++ struct ubifs_scan_leb *sleb; ++ int err; ++ ++ dbg_scan("scan LEB %d:%d", lnum, offs); ++ ++ sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS); ++ if (!sleb) ++ return ERR_PTR(-ENOMEM); ++ ++ sleb->lnum = lnum; ++ INIT_LIST_HEAD(&sleb->nodes); ++ sleb->buf = sbuf; ++ ++ err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); ++ if (err && err != -EBADMSG) { ++ ubifs_err("cannot read %d bytes from LEB %d:%d," ++ " error %d", c->leb_size - offs, lnum, offs, err); ++ kfree(sleb); ++ return ERR_PTR(err); ++ } ++ ++ if (err == -EBADMSG) ++ sleb->ecc = 1; ++ ++ return sleb; ++} ++ ++/** ++ * ubifs_end_scan - update LEB scanning information at end of scan. ++ * @c: UBIFS file-system description object ++ * @sleb: scanning information ++ * @lnum: logical eraseblock number ++ * @offs: offset to start at (usually zero) ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ int lnum, int offs) ++{ ++ lnum = lnum; ++ dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); ++ ubifs_assert(offs % c->min_io_size == 0); ++ ++ sleb->endpt = ALIGN(offs, c->min_io_size); ++} ++ ++/** ++ * ubifs_add_snod - add a scanned node to LEB scanning information. ++ * @c: UBIFS file-system description object ++ * @sleb: scanning information ++ * @buf: buffer containing node ++ * @offs: offset of node on flash ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ void *buf, int offs) ++{ ++ struct ubifs_ch *ch = buf; ++ struct ubifs_ino_node *ino = buf; ++ struct ubifs_scan_node *snod; ++ ++ snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); ++ if (!snod) ++ return -ENOMEM; ++ ++ snod->sqnum = le64_to_cpu(ch->sqnum); ++ snod->type = ch->node_type; ++ snod->offs = offs; ++ snod->len = le32_to_cpu(ch->len); ++ snod->node = buf; ++ ++ switch (ch->node_type) { ++ case UBIFS_INO_NODE: ++ case UBIFS_DENT_NODE: ++ case UBIFS_XENT_NODE: ++ case UBIFS_DATA_NODE: ++ case UBIFS_TRUN_NODE: ++ /* ++ * The key is in the same place in all keyed ++ * nodes. ++ */ ++ key_read(c, &ino->key, &snod->key); ++ break; ++ } ++ list_add_tail(&snod->list, &sleb->nodes); ++ sleb->nodes_cnt += 1; ++ return 0; ++} ++ ++/** ++ * ubifs_scanned_corruption - print information after UBIFS scanned corruption. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number of corruption ++ * @offs: offset of corruption ++ * @buf: buffer containing corruption ++ */ ++void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, ++ void *buf) ++{ ++ int len; ++ ++ ubifs_err("corrupted data at LEB %d:%d", lnum, offs); ++ if (dbg_failure_mode) ++ return; ++ len = c->leb_size - offs; ++ if (len > 4096) ++ len = 4096; ++ dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); ++} ++ ++/** ++ * ubifs_scan - scan a logical eraseblock. ++ * @c: UBIFS file-system description object ++ * @lnum: logical eraseblock number ++ * @offs: offset to start at (usually zero) ++ * @sbuf: scan buffer (must be c->leb_size) ++ * ++ * This function scans LEB number @lnum and returns complete information about ++ * its contents. Returns an error code in case of failure. ++ */ ++struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, ++ int offs, void *sbuf) ++{ ++ void *buf = sbuf + offs; ++ int err, len = c->leb_size - offs; ++ struct ubifs_scan_leb *sleb; ++ ++ sleb = ubifs_start_scan(c, lnum, offs, sbuf); ++ if (IS_ERR(sleb)) ++ return sleb; ++ ++ while (len >= 8) { ++ struct ubifs_ch *ch = buf; ++ int node_len, ret; ++ ++ dbg_scan("look at LEB %d:%d (%d bytes left)", ++ lnum, offs, len); ++ ++ cond_resched(); ++ ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); ++ ++ if (ret > 0) { ++ /* Padding bytes or a valid padding node */ ++ offs += ret; ++ buf += ret; ++ len -= ret; ++ continue; ++ } ++ ++ if (ret == SCANNED_EMPTY_SPACE) ++ /* Empty space is checked later */ ++ break; ++ ++ switch (ret) { ++ case SCANNED_GARBAGE: ++ dbg_err("garbage"); ++ goto corrupted; ++ case SCANNED_A_NODE: ++ break; ++ case SCANNED_A_CORRUPT_NODE: ++ case SCANNED_A_BAD_PAD_NODE: ++ dbg_err("bad node"); ++ goto corrupted; ++ default: ++ dbg_err("unknown"); ++ goto corrupted; ++ } ++ ++ err = ubifs_add_snod(c, sleb, buf, offs); ++ if (err) ++ goto error; ++ ++ node_len = ALIGN(le32_to_cpu(ch->len), 8); ++ offs += node_len; ++ buf += node_len; ++ len -= node_len; ++ } ++ ++ if (offs % c->min_io_size) ++ goto corrupted; ++ ++ ubifs_end_scan(c, sleb, lnum, offs); ++ ++ for (; len > 4; offs += 4, buf = buf + 4, len -= 4) ++ if (*(uint32_t *)buf != 0xffffffff) ++ break; ++ for (; len; offs++, buf++, len--) ++ if (*(uint8_t *)buf != 0xff) { ++ ubifs_err("corrupt empty space at LEB %d:%d", ++ lnum, offs); ++ goto corrupted; ++ } ++ ++ return sleb; ++ ++corrupted: ++ ubifs_scanned_corruption(c, lnum, offs, buf); ++ err = -EUCLEAN; ++error: ++ ubifs_err("LEB %d scanning failed", lnum); ++ ubifs_scan_destroy(sleb); ++ return ERR_PTR(err); ++} ++ ++/** ++ * ubifs_scan_destroy - destroy LEB scanning information. ++ * @sleb: scanning information to free ++ */ ++void ubifs_scan_destroy(struct ubifs_scan_leb *sleb) ++{ ++ struct ubifs_scan_node *node; ++ struct list_head *head; ++ ++ head = &sleb->nodes; ++ while (!list_empty(head)) { ++ node = list_entry(head->next, struct ubifs_scan_node, list); ++ list_del(&node->list); ++ kfree(node); ++ } ++ kfree(sleb); ++} +diff -Nurd linux-2.6.24/fs/ubifs/shrinker.c ubifs-v2.6.24/fs/ubifs/shrinker.c +--- linux-2.6.24/fs/ubifs/shrinker.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/shrinker.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,320 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file implements UBIFS shrinker which evicts clean znodes from the TNC ++ * tree when Linux VM needs more RAM. ++ * ++ * We do not implement any LRU lists to find oldest znodes to free because it ++ * would add additional overhead to the file system fast paths. So the shrinker ++ * just walks the TNC tree when searching for znodes to free. ++ * ++ * If the root of a TNC sub-tree is clean and old enough, then the children are ++ * also clean and old enough. So the shrinker walks the TNC in level order and ++ * dumps entire sub-trees. ++ * ++ * The age of znodes is just the time-stamp when they were last looked at. ++ * The current shrinker first tries to evict old znodes, then young ones. ++ * ++ * Since the shrinker is global, it has to protect against races with FS ++ * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. ++ */ ++ ++#include "ubifs.h" ++ ++/* List of all UBIFS file-system instances */ ++LIST_HEAD(ubifs_infos); ++ ++/* ++ * We number each shrinker run and record the number on the ubifs_info structure ++ * so that we can easily work out which ubifs_info structures have already been ++ * done by the current run. ++ */ ++static unsigned int shrinker_run_no; ++ ++/* Protects 'ubifs_infos' list */ ++DEFINE_SPINLOCK(ubifs_infos_lock); ++ ++/* Global clean znode counter (for all mounted UBIFS instances) */ ++atomic_long_t ubifs_clean_zn_cnt; ++ ++/** ++ * shrink_tnc - shrink TNC tree. ++ * @c: UBIFS file-system description object ++ * @nr: number of znodes to free ++ * @age: the age of znodes to free ++ * @contention: if any contention, this is set to %1 ++ * ++ * This function traverses TNC tree and frees clean znodes. It does not free ++ * clean znodes which younger then @age. Returns number of freed znodes. ++ */ ++static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) ++{ ++ int total_freed = 0; ++ struct ubifs_znode *znode, *zprev; ++ int time = get_seconds(); ++ ++ ubifs_assert(mutex_is_locked(&c->umount_mutex)); ++ ubifs_assert(mutex_is_locked(&c->tnc_mutex)); ++ ++ if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) ++ return 0; ++ ++ /* ++ * Traverse the TNC tree in levelorder manner, so that it is possible ++ * to destroy large sub-trees. Indeed, if a znode is old, then all its ++ * children are older or of the same age. ++ * ++ * Note, we are holding 'c->tnc_mutex', so we do not have to lock the ++ * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is ++ * changed only when the 'c->tnc_mutex' is held. ++ */ ++ zprev = NULL; ++ znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); ++ while (znode && total_freed < nr && ++ atomic_long_read(&c->clean_zn_cnt) > 0) { ++ int freed; ++ ++ /* ++ * If the znode is clean, but it is in the 'c->cnext' list, this ++ * means that this znode has just been written to flash as a ++ * part of commit and was marked clean. They will be removed ++ * from the list at end commit. We cannot change the list, ++ * because it is not protected by any mutex (design decision to ++ * make commit really independent and parallel to main I/O). So ++ * we just skip these znodes. ++ * ++ * Note, the 'clean_zn_cnt' counters are not updated until ++ * after the commit, so the UBIFS shrinker does not report ++ * the znodes which are in the 'c->cnext' list as freeable. ++ * ++ * Also note, if the root of a sub-tree is not in 'c->cnext', ++ * then the whole sub-tree is not in 'c->cnext' as well, so it ++ * is safe to dump whole sub-tree. ++ */ ++ ++ if (znode->cnext) { ++ /* ++ * Very soon these znodes will be removed from the list ++ * and become freeable. ++ */ ++ *contention = 1; ++ } else if (!ubifs_zn_dirty(znode) && ++ abs(time - znode->time) >= age) { ++ if (znode->parent) ++ znode->parent->zbranch[znode->iip].znode = NULL; ++ else ++ c->zroot.znode = NULL; ++ ++ freed = ubifs_destroy_tnc_subtree(znode); ++ atomic_long_sub(freed, &ubifs_clean_zn_cnt); ++ atomic_long_sub(freed, &c->clean_zn_cnt); ++ ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); ++ total_freed += freed; ++ znode = zprev; ++ } ++ ++ if (unlikely(!c->zroot.znode)) ++ break; ++ ++ zprev = znode; ++ znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); ++ cond_resched(); ++ } ++ ++ return total_freed; ++} ++ ++/** ++ * shrink_tnc_trees - shrink UBIFS TNC trees. ++ * @nr: number of znodes to free ++ * @age: the age of znodes to free ++ * @contention: if any contention, this is set to %1 ++ * ++ * This function walks the list of mounted UBIFS file-systems and frees clean ++ * znodes which are older then @age, until at least @nr znodes are freed. ++ * Returns the number of freed znodes. ++ */ ++static int shrink_tnc_trees(int nr, int age, int *contention) ++{ ++ struct ubifs_info *c; ++ struct list_head *p; ++ unsigned int run_no; ++ int freed = 0; ++ ++ spin_lock(&ubifs_infos_lock); ++ do { ++ run_no = ++shrinker_run_no; ++ } while (run_no == 0); ++ /* Iterate over all mounted UBIFS file-systems and try to shrink them */ ++ p = ubifs_infos.next; ++ while (p != &ubifs_infos) { ++ c = list_entry(p, struct ubifs_info, infos_list); ++ /* ++ * We move the ones we do to the end of the list, so we stop ++ * when we see one we have already done. ++ */ ++ if (c->shrinker_run_no == run_no) ++ break; ++ if (!mutex_trylock(&c->umount_mutex)) { ++ /* Some un-mount is in progress, try next FS */ ++ *contention = 1; ++ p = p->next; ++ continue; ++ } ++ /* ++ * We're holding 'c->umount_mutex', so the file-system won't go ++ * away. ++ */ ++ if (!mutex_trylock(&c->tnc_mutex)) { ++ mutex_unlock(&c->umount_mutex); ++ *contention = 1; ++ p = p->next; ++ continue; ++ } ++ spin_unlock(&ubifs_infos_lock); ++ /* ++ * OK, now we have TNC locked, the file-system cannot go away - ++ * it is safe to reap the cache. ++ */ ++ c->shrinker_run_no = run_no; ++ freed += shrink_tnc(c, nr, age, contention); ++ mutex_unlock(&c->tnc_mutex); ++ spin_lock(&ubifs_infos_lock); ++ /* Get the next list element before we move this one */ ++ p = p->next; ++ /* ++ * Move this one to the end of the list to provide some ++ * fairness. ++ */ ++ list_move_tail(&c->infos_list, &ubifs_infos); ++ mutex_unlock(&c->umount_mutex); ++ if (freed >= nr) ++ break; ++ } ++ spin_unlock(&ubifs_infos_lock); ++ return freed; ++} ++ ++/** ++ * kick_a_thread - kick a background thread to start commit. ++ * ++ * This function kicks a background thread to start background commit. Returns ++ * %-1 if a thread was kicked or there is another reason to assume the memory ++ * will soon be freed or become freeable. If there are no dirty znodes, returns ++ * %0. ++ */ ++static int kick_a_thread(void) ++{ ++ int i; ++ struct ubifs_info *c; ++ ++ /* ++ * Iterate over all mounted UBIFS file-systems and find out if there is ++ * already an ongoing commit operation there. If no, then iterate for ++ * the second time and initiate background commit. ++ */ ++ spin_lock(&ubifs_infos_lock); ++ for (i = 0; i < 2; i++) { ++ list_for_each_entry(c, &ubifs_infos, infos_list) { ++ long dirty_zn_cnt; ++ ++ if (!mutex_trylock(&c->umount_mutex)) { ++ /* ++ * Some un-mount is in progress, it will ++ * certainly free memory, so just return. ++ */ ++ spin_unlock(&ubifs_infos_lock); ++ return -1; ++ } ++ ++ dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); ++ ++ if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || ++ c->ro_media) { ++ mutex_unlock(&c->umount_mutex); ++ continue; ++ } ++ ++ if (c->cmt_state != COMMIT_RESTING) { ++ spin_unlock(&ubifs_infos_lock); ++ mutex_unlock(&c->umount_mutex); ++ return -1; ++ } ++ ++ if (i == 1) { ++ list_move_tail(&c->infos_list, &ubifs_infos); ++ spin_unlock(&ubifs_infos_lock); ++ ++ ubifs_request_bg_commit(c); ++ mutex_unlock(&c->umount_mutex); ++ return -1; ++ } ++ mutex_unlock(&c->umount_mutex); ++ } ++ } ++ spin_unlock(&ubifs_infos_lock); ++ ++ return 0; ++} ++ ++int ubifs_shrinker(int nr, gfp_t gfp_mask) ++{ ++ int freed, contention = 0; ++ long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); ++ ++ if (nr == 0) ++ return clean_zn_cnt; ++ ++ if (!clean_zn_cnt) { ++ /* ++ * No clean znodes, nothing to reap. All we can do in this case ++ * is to kick background threads to start commit, which will ++ * probably make clean znodes which, in turn, will be freeable. ++ * And we return -1 which means will make VM call us again ++ * later. ++ */ ++ dbg_tnc("no clean znodes, kick a thread"); ++ return kick_a_thread(); ++ } ++ ++ freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); ++ if (freed >= nr) ++ goto out; ++ ++ dbg_tnc("not enough old znodes, try to free young ones"); ++ freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); ++ if (freed >= nr) ++ goto out; ++ ++ dbg_tnc("not enough young znodes, free all"); ++ freed += shrink_tnc_trees(nr - freed, 0, &contention); ++ ++ if (!freed && contention) { ++ dbg_tnc("freed nothing, but contention"); ++ return -1; ++ } ++ ++out: ++ dbg_tnc("%d znodes were freed, requested %d", freed, nr); ++ return freed; ++} +diff -Nurd linux-2.6.24/fs/ubifs/super.c ubifs-v2.6.24/fs/ubifs/super.c +--- linux-2.6.24/fs/ubifs/super.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/super.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,2209 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file implements UBIFS initialization and VFS superblock operations. Some ++ * initialization stuff which is rather large and complex is placed at ++ * corresponding subsystems, but most of it is here. ++ */ ++ ++#include <linux/init.h> ++#include <linux/slab.h> ++#include <linux/module.h> ++#include <linux/ctype.h> ++#include <linux/kthread.h> ++#include <linux/parser.h> ++#include <linux/seq_file.h> ++#include <linux/mount.h> ++#include <linux/writeback.h> ++#include "ubifs.h" ++ ++/* ++ * Maximum amount of memory we may 'kmalloc()' without worrying that we are ++ * allocating too much. ++ */ ++#define UBIFS_KMALLOC_OK (128*1024) ++ ++/* Slab cache for UBIFS inodes */ ++struct kmem_cache *ubifs_inode_slab; ++ ++#ifndef UBIFS_COMPAT_NO_SHRINKER ++/* UBIFS TNC shrinker description */ ++static struct shrinker ubifs_shrinker_info = { ++ .shrink = ubifs_shrinker, ++ .seeks = DEFAULT_SEEKS, ++}; ++#endif ++ ++/** ++ * validate_inode - validate inode. ++ * @c: UBIFS file-system description object ++ * @inode: the inode to validate ++ * ++ * This is a helper function for 'ubifs_iget()' which validates various fields ++ * of a newly built inode to make sure they contain sane values and prevent ++ * possible vulnerabilities. Returns zero if the inode is all right and ++ * a non-zero error code if not. ++ */ ++static int validate_inode(struct ubifs_info *c, const struct inode *inode) ++{ ++ int err; ++ const struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ if (inode->i_size > c->max_inode_sz) { ++ ubifs_err("inode is too large (%lld)", ++ (long long)inode->i_size); ++ return 1; ++ } ++ ++ if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { ++ ubifs_err("unknown compression type %d", ui->compr_type); ++ return 2; ++ } ++ ++ if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX) ++ return 3; ++ ++ if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) ++ return 4; ++ ++ if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) ++ return 5; ++ ++ if (!ubifs_compr_present(ui->compr_type)) { ++ ubifs_warn("inode %lu uses '%s' compression, but it was not " ++ "compiled in", inode->i_ino, ++ ubifs_compr_name(ui->compr_type)); ++ } ++ ++ err = dbg_check_dir_size(c, inode); ++ return err; ++} ++ ++struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) ++{ ++ struct inode *inode; ++ ++ inode = iget(sb, inum); ++ if (!inode) { ++ make_bad_inode(inode); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ return inode; ++} ++ ++void ubifs_read_inode(struct inode *inode) ++{ ++ int err; ++ union ubifs_key key; ++ struct ubifs_ino_node *ino; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ dbg_gen("inode %lu", inode->i_ino); ++ ubifs_assert(inode->i_state & I_LOCK); ++ ++ ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); ++ if (!ino) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ ino_key_init(c, &key, inode->i_ino); ++ ++ err = ubifs_tnc_lookup(c, &key, ino); ++ if (err) ++ goto out_ino; ++ ++ inode->i_flags |= (S_NOCMTIME | S_NOATIME); ++ inode->i_nlink = le32_to_cpu(ino->nlink); ++ inode->i_uid = le32_to_cpu(ino->uid); ++ inode->i_gid = le32_to_cpu(ino->gid); ++ inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); ++ inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); ++ inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); ++ inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); ++ inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec); ++ inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); ++ inode->i_mode = le32_to_cpu(ino->mode); ++ inode->i_size = le64_to_cpu(ino->size); ++ ++ ui->data_len = le32_to_cpu(ino->data_len); ++ ui->flags = le32_to_cpu(ino->flags); ++ ui->compr_type = le16_to_cpu(ino->compr_type); ++ ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); ++ ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt); ++ ui->xattr_size = le32_to_cpu(ino->xattr_size); ++ ui->xattr_names = le32_to_cpu(ino->xattr_names); ++ ui->synced_i_size = ui->ui_size = inode->i_size; ++ ++ ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0; ++ ++ err = validate_inode(c, inode); ++ if (err) ++ goto out_invalid; ++ ++ /* Disable read-ahead */ ++ inode->i_mapping->backing_dev_info = &c->bdi; ++ ++ switch (inode->i_mode & S_IFMT) { ++ case S_IFREG: ++ inode->i_mapping->a_ops = &ubifs_file_address_operations; ++ inode->i_op = &ubifs_file_inode_operations; ++ inode->i_fop = &ubifs_file_operations; ++ if (ui->xattr) { ++ ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); ++ if (!ui->data) { ++ err = -ENOMEM; ++ goto out_ino; ++ } ++ memcpy(ui->data, ino->data, ui->data_len); ++ ((char *)ui->data)[ui->data_len] = '\0'; ++ } else if (ui->data_len != 0) { ++ err = 10; ++ goto out_invalid; ++ } ++ break; ++ case S_IFDIR: ++ inode->i_op = &ubifs_dir_inode_operations; ++ inode->i_fop = &ubifs_dir_operations; ++ if (ui->data_len != 0) { ++ err = 11; ++ goto out_invalid; ++ } ++ break; ++ case S_IFLNK: ++ inode->i_op = &ubifs_symlink_inode_operations; ++ if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { ++ err = 12; ++ goto out_invalid; ++ } ++ ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); ++ if (!ui->data) { ++ err = -ENOMEM; ++ goto out_ino; ++ } ++ memcpy(ui->data, ino->data, ui->data_len); ++ ((char *)ui->data)[ui->data_len] = '\0'; ++ break; ++ case S_IFBLK: ++ case S_IFCHR: ++ { ++ dev_t rdev; ++ union ubifs_dev_desc *dev; ++ ++ ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); ++ if (!ui->data) { ++ err = -ENOMEM; ++ goto out_ino; ++ } ++ ++ dev = (union ubifs_dev_desc *)ino->data; ++ if (ui->data_len == sizeof(dev->new)) ++ rdev = new_decode_dev(le32_to_cpu(dev->new)); ++ else if (ui->data_len == sizeof(dev->huge)) ++ rdev = huge_decode_dev(le64_to_cpu(dev->huge)); ++ else { ++ err = 13; ++ goto out_invalid; ++ } ++ memcpy(ui->data, ino->data, ui->data_len); ++ inode->i_op = &ubifs_file_inode_operations; ++ init_special_inode(inode, inode->i_mode, rdev); ++ break; ++ } ++ case S_IFSOCK: ++ case S_IFIFO: ++ inode->i_op = &ubifs_file_inode_operations; ++ init_special_inode(inode, inode->i_mode, 0); ++ if (ui->data_len != 0) { ++ err = 14; ++ goto out_invalid; ++ } ++ break; ++ default: ++ err = 15; ++ goto out_invalid; ++ } ++ ++ ubifs_set_inode_flags(inode); ++ kfree(ino); ++ return; ++ ++out_invalid: ++ ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); ++ dbg_dump_inode(c, inode); ++ dbg_dump_node(c, ino); ++ err = -EINVAL; ++out_ino: ++ kfree(ino); ++out: ++ ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); ++ make_bad_inode(inode); ++ return; ++} ++ ++static struct inode *ubifs_alloc_inode(struct super_block *sb) ++{ ++ struct ubifs_inode *ui; ++ ++ ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS); ++ if (!ui) ++ return NULL; ++ ++ memset((void *)ui + sizeof(struct inode), 0, ++ sizeof(struct ubifs_inode) - sizeof(struct inode)); ++ mutex_init(&ui->ui_mutex); ++ spin_lock_init(&ui->ui_lock); ++ return &ui->vfs_inode; ++}; ++ ++static void ubifs_destroy_inode(struct inode *inode) ++{ ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ kfree(ui->data); ++ kmem_cache_free(ubifs_inode_slab, inode); ++} ++ ++/* ++ * Note, Linux write-back code calls this without 'i_mutex'. ++ */ ++static int ubifs_write_inode(struct inode *inode, int wait) ++{ ++ int err = 0; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ ubifs_assert(!ui->xattr); ++ if (is_bad_inode(inode)) ++ return 0; ++ ++ mutex_lock(&ui->ui_mutex); ++ /* ++ * Due to races between write-back forced by budgeting ++ * (see 'sync_some_inodes()') and pdflush write-back, the inode may ++ * have already been synchronized, do not do this again. This might ++ * also happen if it was synchronized in an VFS operation, e.g. ++ * 'ubifs_link()'. ++ */ ++ if (!ui->dirty) { ++ mutex_unlock(&ui->ui_mutex); ++ return 0; ++ } ++ ++ /* ++ * As an optimization, do not write orphan inodes to the media just ++ * because this is not needed. ++ */ ++ dbg_gen("inode %lu, mode %#x, nlink %u", ++ inode->i_ino, (int)inode->i_mode, inode->i_nlink); ++ if (inode->i_nlink) { ++ err = ubifs_jnl_write_inode(c, inode); ++ if (err) ++ ubifs_err("can't write inode %lu, error %d", ++ inode->i_ino, err); ++ } ++ ++ ui->dirty = 0; ++ mutex_unlock(&ui->ui_mutex); ++ ubifs_release_dirty_inode_budget(c, ui); ++ return err; ++} ++ ++static void ubifs_delete_inode(struct inode *inode) ++{ ++ int err; ++ struct ubifs_info *c = inode->i_sb->s_fs_info; ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ if (ui->xattr) ++ /* ++ * Extended attribute inode deletions are fully handled in ++ * 'ubifs_removexattr()'. These inodes are special and have ++ * limited usage, so there is nothing to do here. ++ */ ++ goto out; ++ ++ dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); ++ ubifs_assert(!atomic_read(&inode->i_count)); ++ ubifs_assert(inode->i_nlink == 0); ++ ++ truncate_inode_pages(&inode->i_data, 0); ++ if (is_bad_inode(inode)) ++ goto out; ++ ++ ui->ui_size = inode->i_size = 0; ++ err = ubifs_jnl_delete_inode(c, inode); ++ if (err) ++ /* ++ * Worst case we have a lost orphan inode wasting space, so a ++ * simple error message is OK here. ++ */ ++ ubifs_err("can't delete inode %lu, error %d", ++ inode->i_ino, err); ++ ++out: ++ if (ui->dirty) ++ ubifs_release_dirty_inode_budget(c, ui); ++ clear_inode(inode); ++} ++ ++static void ubifs_dirty_inode(struct inode *inode) ++{ ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ ++ ubifs_assert(mutex_is_locked(&ui->ui_mutex)); ++ if (!ui->dirty) { ++ ui->dirty = 1; ++ dbg_gen("inode %lu", inode->i_ino); ++ } ++} ++ ++static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ struct ubifs_info *c = dentry->d_sb->s_fs_info; ++ unsigned long long free; ++ __le32 *uuid = (__le32 *)c->uuid; ++ ++ free = ubifs_get_free_space(c); ++ dbg_gen("free space %lld bytes (%lld blocks)", ++ free, free >> UBIFS_BLOCK_SHIFT); ++ ++ buf->f_type = UBIFS_SUPER_MAGIC; ++ buf->f_bsize = UBIFS_BLOCK_SIZE; ++ buf->f_blocks = c->block_cnt; ++ buf->f_bfree = free >> UBIFS_BLOCK_SHIFT; ++ if (free > c->report_rp_size) ++ buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT; ++ else ++ buf->f_bavail = 0; ++ buf->f_files = 0; ++ buf->f_ffree = 0; ++ buf->f_namelen = UBIFS_MAX_NLEN; ++ buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); ++ buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); ++ ubifs_assert(buf->f_bfree <= c->block_cnt); ++ return 0; ++} ++ ++static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) ++{ ++ struct ubifs_info *c = mnt->mnt_sb->s_fs_info; ++ ++ if (c->mount_opts.unmount_mode == 2) ++ seq_printf(s, ",fast_unmount"); ++ else if (c->mount_opts.unmount_mode == 1) ++ seq_printf(s, ",norm_unmount"); ++ ++ if (c->mount_opts.bulk_read == 2) ++ seq_printf(s, ",bulk_read"); ++ else if (c->mount_opts.bulk_read == 1) ++ seq_printf(s, ",no_bulk_read"); ++ ++ if (c->mount_opts.chk_data_crc == 2) ++ seq_printf(s, ",chk_data_crc"); ++ else if (c->mount_opts.chk_data_crc == 1) ++ seq_printf(s, ",no_chk_data_crc"); ++ ++ if (c->mount_opts.override_compr) { ++ seq_printf(s, ",compr=%s", ++ ubifs_compr_name(c->mount_opts.compr_type)); ++ } ++ ++ return 0; ++} ++ ++static int ubifs_sync_fs(struct super_block *sb, int wait) ++{ ++ int i, err; ++ struct ubifs_info *c = sb->s_fs_info; ++ struct writeback_control wbc = { ++ .sync_mode = WB_SYNC_ALL, ++ .range_start = 0, ++ .range_end = LLONG_MAX, ++ .nr_to_write = LONG_MAX, ++ }; ++ ++ /* ++ * Zero @wait is just an advisory thing to help the file system shove ++ * lots of data into the queues, and there will be the second ++ * '->sync_fs()' call, with non-zero @wait. ++ */ ++ if (!wait) ++ return 0; ++ ++ if (sb->s_flags & MS_RDONLY) ++ return 0; ++ ++ /* ++ * VFS calls '->sync_fs()' before synchronizing all dirty inodes and ++ * pages, so synchronize them first, then commit the journal. Strictly ++ * speaking, it is not necessary to commit the journal here, ++ * synchronizing write-buffers would be enough. But committing makes ++ * UBIFS free space predictions much more accurate, so we want to let ++ * the user be able to get more accurate results of 'statfs()' after ++ * they synchronize the file system. ++ */ ++ generic_sync_sb_inodes(sb, &wbc); ++ ++ /* ++ * Synchronize write buffers, because 'ubifs_run_commit()' does not ++ * do this if it waits for an already running commit. ++ */ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ err = ubifs_wbuf_sync(&c->jheads[i].wbuf); ++ if (err) ++ return err; ++ } ++ ++ err = ubifs_run_commit(c); ++ if (err) ++ return err; ++ ++ return ubi_sync(c->vi.ubi_num); ++} ++ ++/** ++ * init_constants_early - initialize UBIFS constants. ++ * @c: UBIFS file-system description object ++ * ++ * This function initialize UBIFS constants which do not need the superblock to ++ * be read. It also checks that the UBI volume satisfies basic UBIFS ++ * requirements. Returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int init_constants_early(struct ubifs_info *c) ++{ ++ if (c->vi.corrupted) { ++ ubifs_warn("UBI volume is corrupted - read-only mode"); ++ c->ro_media = 1; ++ } ++ ++ if (c->di.ro_mode) { ++ ubifs_msg("read-only UBI device"); ++ c->ro_media = 1; ++ } ++ ++ if (c->vi.vol_type == UBI_STATIC_VOLUME) { ++ ubifs_msg("static UBI volume - read-only mode"); ++ c->ro_media = 1; ++ } ++ ++ c->leb_cnt = c->vi.size; ++ c->leb_size = c->vi.usable_leb_size; ++ c->half_leb_size = c->leb_size / 2; ++ c->min_io_size = c->di.min_io_size; ++ c->min_io_shift = fls(c->min_io_size) - 1; ++ ++ if (c->leb_size < UBIFS_MIN_LEB_SZ) { ++ ubifs_err("too small LEBs (%d bytes), min. is %d bytes", ++ c->leb_size, UBIFS_MIN_LEB_SZ); ++ return -EINVAL; ++ } ++ ++ if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { ++ ubifs_err("too few LEBs (%d), min. is %d", ++ c->leb_cnt, UBIFS_MIN_LEB_CNT); ++ return -EINVAL; ++ } ++ ++ if (!is_power_of_2(c->min_io_size)) { ++ ubifs_err("bad min. I/O size %d", c->min_io_size); ++ return -EINVAL; ++ } ++ ++ /* ++ * UBIFS aligns all node to 8-byte boundary, so to make function in ++ * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is ++ * less than 8. ++ */ ++ if (c->min_io_size < 8) { ++ c->min_io_size = 8; ++ c->min_io_shift = 3; ++ } ++ ++ c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); ++ c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size); ++ ++ /* ++ * Initialize node length ranges which are mostly needed for node ++ * length validation. ++ */ ++ c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ; ++ c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ; ++ c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ; ++ c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ; ++ c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ; ++ c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ; ++ ++ c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ; ++ c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ; ++ c->ranges[UBIFS_ORPH_NODE].min_len = ++ UBIFS_ORPH_NODE_SZ + sizeof(__le64); ++ c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size; ++ c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ; ++ c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ; ++ c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ; ++ c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ; ++ c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ; ++ c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ; ++ /* ++ * Minimum indexing node size is amended later when superblock is ++ * read and the key length is known. ++ */ ++ c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ; ++ /* ++ * Maximum indexing node size is amended later when superblock is ++ * read and the fanout is known. ++ */ ++ c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; ++ ++ /* ++ * Initialize dead and dark LEB space watermarks. See gc.c for comments ++ * about these values. ++ */ ++ c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); ++ c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); ++ ++ /* ++ * Calculate how many bytes would be wasted at the end of LEB if it was ++ * fully filled with data nodes of maximum size. This is used in ++ * calculations when reporting free space. ++ */ ++ c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; ++ ++ /* Buffer size for bulk-reads */ ++ c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; ++ if (c->max_bu_buf_len > c->leb_size) ++ c->max_bu_buf_len = c->leb_size; ++ return 0; ++} ++ ++/** ++ * bud_wbuf_callback - bud LEB write-buffer synchronization call-back. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB the write-buffer was synchronized to ++ * @free: how many free bytes left in this LEB ++ * @pad: how many bytes were padded ++ * ++ * This is a callback function which is called by the I/O unit when the ++ * write-buffer is synchronized. We need this to correctly maintain space ++ * accounting in bud logical eraseblocks. This function returns zero in case of ++ * success and a negative error code in case of failure. ++ * ++ * This function actually belongs to the journal, but we keep it here because ++ * we want to keep it static. ++ */ ++static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) ++{ ++ return ubifs_update_one_lp(c, lnum, free, pad, 0, 0); ++} ++ ++/* ++ * init_constants_sb - initialize UBIFS constants. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function which initializes various UBIFS constants after ++ * the superblock has been read. It also checks various UBIFS parameters and ++ * makes sure they are all right. Returns zero in case of success and a ++ * negative error code in case of failure. ++ */ ++static int init_constants_sb(struct ubifs_info *c) ++{ ++ int tmp, err; ++ long long tmp64; ++ ++ c->main_bytes = (long long)c->main_lebs * c->leb_size; ++ c->max_znode_sz = sizeof(struct ubifs_znode) + ++ c->fanout * sizeof(struct ubifs_zbranch); ++ ++ tmp = ubifs_idx_node_sz(c, 1); ++ c->ranges[UBIFS_IDX_NODE].min_len = tmp; ++ c->min_idx_node_sz = ALIGN(tmp, 8); ++ ++ tmp = ubifs_idx_node_sz(c, c->fanout); ++ c->ranges[UBIFS_IDX_NODE].max_len = tmp; ++ c->max_idx_node_sz = ALIGN(tmp, 8); ++ ++ /* Make sure LEB size is large enough to fit full commit */ ++ tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; ++ tmp = ALIGN(tmp, c->min_io_size); ++ if (tmp > c->leb_size) { ++ dbg_err("too small LEB size %d, at least %d needed", ++ c->leb_size, tmp); ++ return -EINVAL; ++ } ++ ++ /* ++ * Make sure that the log is large enough to fit reference nodes for ++ * all buds plus one reserved LEB. ++ */ ++ tmp64 = c->max_bud_bytes + c->leb_size - 1; ++ c->max_bud_cnt = div_u64(tmp64, c->leb_size); ++ tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); ++ tmp /= c->leb_size; ++ tmp += 1; ++ if (c->log_lebs < tmp) { ++ dbg_err("too small log %d LEBs, required min. %d LEBs", ++ c->log_lebs, tmp); ++ return -EINVAL; ++ } ++ ++ /* ++ * When budgeting we assume worst-case scenarios when the pages are not ++ * be compressed and direntries are of the maximum size. ++ * ++ * Note, data, which may be stored in inodes is budgeted separately, so ++ * it is not included into 'c->inode_budget'. ++ */ ++ c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; ++ c->inode_budget = UBIFS_INO_NODE_SZ; ++ c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; ++ ++ /* ++ * When the amount of flash space used by buds becomes ++ * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit. ++ * The writers are unblocked when the commit is finished. To avoid ++ * writers to be blocked UBIFS initiates background commit in advance, ++ * when number of bud bytes becomes above the limit defined below. ++ */ ++ c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4; ++ ++ /* ++ * Ensure minimum journal size. All the bytes in the journal heads are ++ * considered to be used, when calculating the current journal usage. ++ * Consequently, if the journal is too small, UBIFS will treat it as ++ * always full. ++ */ ++ tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1; ++ if (c->bg_bud_bytes < tmp64) ++ c->bg_bud_bytes = tmp64; ++ if (c->max_bud_bytes < tmp64 + c->leb_size) ++ c->max_bud_bytes = tmp64 + c->leb_size; ++ ++ err = ubifs_calc_lpt_geom(c); ++ if (err) ++ return err; ++ ++ /* Initialize effective LEB size used in budgeting calculations */ ++ c->idx_leb_size = c->leb_size - c->max_idx_node_sz; ++ return 0; ++} ++ ++/* ++ * init_constants_master - initialize UBIFS constants. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function which initializes various UBIFS constants after ++ * the master node has been read. It also checks various UBIFS parameters and ++ * makes sure they are all right. ++ */ ++static void init_constants_master(struct ubifs_info *c) ++{ ++ long long tmp64; ++ ++ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->report_rp_size = ubifs_reported_space(c, c->rp_size); ++ ++ /* ++ * Calculate total amount of FS blocks. This number is not used ++ * internally because it does not make much sense for UBIFS, but it is ++ * necessary to report something for the 'statfs()' call. ++ * ++ * Subtract the LEB reserved for GC, the LEB which is reserved for ++ * deletions, minimum LEBs for the index, and assume only one journal ++ * head is available. ++ */ ++ tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; ++ tmp64 *= (long long)c->leb_size - c->leb_overhead; ++ tmp64 = ubifs_reported_space(c, tmp64); ++ c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; ++} ++ ++/** ++ * take_gc_lnum - reserve GC LEB. ++ * @c: UBIFS file-system description object ++ * ++ * This function ensures that the LEB reserved for garbage collection is marked ++ * as "taken" in lprops. We also have to set free space to LEB size and dirty ++ * space to zero, because lprops may contain out-of-date information if the ++ * file-system was un-mounted before it has been committed. This function ++ * returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++static int take_gc_lnum(struct ubifs_info *c) ++{ ++ int err; ++ ++ if (c->gc_lnum == -1) { ++ ubifs_err("no LEB for GC"); ++ return -EINVAL; ++ } ++ ++ /* And we have to tell lprops that this LEB is taken */ ++ err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, ++ LPROPS_TAKEN, 0, 0); ++ return err; ++} ++ ++/** ++ * alloc_wbufs - allocate write-buffers. ++ * @c: UBIFS file-system description object ++ * ++ * This helper function allocates and initializes UBIFS write-buffers. Returns ++ * zero in case of success and %-ENOMEM in case of failure. ++ */ ++static int alloc_wbufs(struct ubifs_info *c) ++{ ++ int i, err; ++ ++ c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), ++ GFP_KERNEL); ++ if (!c->jheads) ++ return -ENOMEM; ++ ++ /* Initialize journal heads */ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ INIT_LIST_HEAD(&c->jheads[i].buds_list); ++ err = ubifs_wbuf_init(c, &c->jheads[i].wbuf); ++ if (err) ++ return err; ++ ++ c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; ++ c->jheads[i].wbuf.jhead = i; ++ } ++ ++ c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; ++ /* ++ * Garbage Collector head likely contains long-term data and ++ * does not need to be synchronized by timer. ++ */ ++ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; ++ c->jheads[GCHD].wbuf.timeout = 0; ++ ++ return 0; ++} ++ ++/** ++ * free_wbufs - free write-buffers. ++ * @c: UBIFS file-system description object ++ */ ++static void free_wbufs(struct ubifs_info *c) ++{ ++ int i; ++ ++ if (c->jheads) { ++ for (i = 0; i < c->jhead_cnt; i++) { ++ kfree(c->jheads[i].wbuf.buf); ++ kfree(c->jheads[i].wbuf.inodes); ++ } ++ kfree(c->jheads); ++ c->jheads = NULL; ++ } ++} ++ ++/** ++ * free_orphans - free orphans. ++ * @c: UBIFS file-system description object ++ */ ++static void free_orphans(struct ubifs_info *c) ++{ ++ struct ubifs_orphan *orph; ++ ++ while (c->orph_dnext) { ++ orph = c->orph_dnext; ++ c->orph_dnext = orph->dnext; ++ list_del(&orph->list); ++ kfree(orph); ++ } ++ ++ while (!list_empty(&c->orph_list)) { ++ orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); ++ list_del(&orph->list); ++ kfree(orph); ++ dbg_err("orphan list not empty at unmount"); ++ } ++ ++ vfree(c->orph_buf); ++ c->orph_buf = NULL; ++} ++ ++/** ++ * free_buds - free per-bud objects. ++ * @c: UBIFS file-system description object ++ */ ++static void free_buds(struct ubifs_info *c) ++{ ++ struct rb_node *this = c->buds.rb_node; ++ struct ubifs_bud *bud; ++ ++ while (this) { ++ if (this->rb_left) ++ this = this->rb_left; ++ else if (this->rb_right) ++ this = this->rb_right; ++ else { ++ bud = rb_entry(this, struct ubifs_bud, rb); ++ this = rb_parent(this); ++ if (this) { ++ if (this->rb_left == &bud->rb) ++ this->rb_left = NULL; ++ else ++ this->rb_right = NULL; ++ } ++ kfree(bud); ++ } ++ } ++} ++ ++/** ++ * check_volume_empty - check if the UBI volume is empty. ++ * @c: UBIFS file-system description object ++ * ++ * This function checks if the UBIFS volume is empty by looking if its LEBs are ++ * mapped or not. The result of checking is stored in the @c->empty variable. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++static int check_volume_empty(struct ubifs_info *c) ++{ ++ int lnum, err; ++ ++ c->empty = 1; ++ for (lnum = 0; lnum < c->leb_cnt; lnum++) { ++ err = ubi_is_mapped(c->ubi, lnum); ++ if (unlikely(err < 0)) ++ return err; ++ if (err == 1) { ++ c->empty = 0; ++ break; ++ } ++ ++ cond_resched(); ++ } ++ ++ return 0; ++} ++ ++/* ++ * UBIFS mount options. ++ * ++ * Opt_fast_unmount: do not run a journal commit before un-mounting ++ * Opt_norm_unmount: run a journal commit before un-mounting ++ * Opt_bulk_read: enable bulk-reads ++ * Opt_no_bulk_read: disable bulk-reads ++ * Opt_chk_data_crc: check CRCs when reading data nodes ++ * Opt_no_chk_data_crc: do not check CRCs when reading data nodes ++ * Opt_override_compr: override default compressor ++ * Opt_err: just end of array marker ++ */ ++enum { ++ Opt_fast_unmount, ++ Opt_norm_unmount, ++ Opt_bulk_read, ++ Opt_no_bulk_read, ++ Opt_chk_data_crc, ++ Opt_no_chk_data_crc, ++ Opt_override_compr, ++ Opt_err, ++}; ++ ++static match_table_t tokens = { ++ {Opt_fast_unmount, "fast_unmount"}, ++ {Opt_norm_unmount, "norm_unmount"}, ++ {Opt_bulk_read, "bulk_read"}, ++ {Opt_no_bulk_read, "no_bulk_read"}, ++ {Opt_chk_data_crc, "chk_data_crc"}, ++ {Opt_no_chk_data_crc, "no_chk_data_crc"}, ++ {Opt_override_compr, "compr=%s"}, ++ {Opt_err, NULL}, ++}; ++ ++/** ++ * ubifs_parse_options - parse mount parameters. ++ * @c: UBIFS file-system description object ++ * @options: parameters to parse ++ * @is_remount: non-zero if this is FS re-mount ++ * ++ * This function parses UBIFS mount options and returns zero in case success ++ * and a negative error code in case of failure. ++ */ ++static int ubifs_parse_options(struct ubifs_info *c, char *options, ++ int is_remount) ++{ ++ char *p; ++ substring_t args[MAX_OPT_ARGS]; ++ ++ if (!options) ++ return 0; ++ ++ while ((p = strsep(&options, ","))) { ++ int token; ++ ++ if (!*p) ++ continue; ++ ++ token = match_token(p, tokens, args); ++ switch (token) { ++ /* ++ * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. ++ * We accepte them in order to be backware-compatible. But this ++ * should be removed at some point. ++ */ ++ case Opt_fast_unmount: ++ c->mount_opts.unmount_mode = 2; ++ break; ++ case Opt_norm_unmount: ++ c->mount_opts.unmount_mode = 1; ++ break; ++ case Opt_bulk_read: ++ c->mount_opts.bulk_read = 2; ++ c->bulk_read = 1; ++ break; ++ case Opt_no_bulk_read: ++ c->mount_opts.bulk_read = 1; ++ c->bulk_read = 0; ++ break; ++ case Opt_chk_data_crc: ++ c->mount_opts.chk_data_crc = 2; ++ c->no_chk_data_crc = 0; ++ break; ++ case Opt_no_chk_data_crc: ++ c->mount_opts.chk_data_crc = 1; ++ c->no_chk_data_crc = 1; ++ break; ++ case Opt_override_compr: ++ { ++ char *name = match_strdup(&args[0]); ++ ++ if (!name) ++ return -ENOMEM; ++ if (!strcmp(name, "none")) ++ c->mount_opts.compr_type = UBIFS_COMPR_NONE; ++ else if (!strcmp(name, "lzo")) ++ c->mount_opts.compr_type = UBIFS_COMPR_LZO; ++ else if (!strcmp(name, "zlib")) ++ c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; ++ else { ++ ubifs_err("unknown compressor \"%s\"", name); ++ kfree(name); ++ return -EINVAL; ++ } ++ kfree(name); ++ c->mount_opts.override_compr = 1; ++ c->default_compr = c->mount_opts.compr_type; ++ break; ++ } ++ default: ++ ubifs_err("unrecognized mount option \"%s\" " ++ "or missing value", p); ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * destroy_journal - destroy journal data structures. ++ * @c: UBIFS file-system description object ++ * ++ * This function destroys journal data structures including those that may have ++ * been created by recovery functions. ++ */ ++static void destroy_journal(struct ubifs_info *c) ++{ ++ while (!list_empty(&c->unclean_leb_list)) { ++ struct ubifs_unclean_leb *ucleb; ++ ++ ucleb = list_entry(c->unclean_leb_list.next, ++ struct ubifs_unclean_leb, list); ++ list_del(&ucleb->list); ++ kfree(ucleb); ++ } ++ while (!list_empty(&c->old_buds)) { ++ struct ubifs_bud *bud; ++ ++ bud = list_entry(c->old_buds.next, struct ubifs_bud, list); ++ list_del(&bud->list); ++ kfree(bud); ++ } ++ ubifs_destroy_idx_gc(c); ++ ubifs_destroy_size_tree(c); ++ ubifs_tnc_close(c); ++ free_buds(c); ++} ++ ++/** ++ * bu_init - initialize bulk-read information. ++ * @c: UBIFS file-system description object ++ */ ++static void bu_init(struct ubifs_info *c) ++{ ++ ubifs_assert(c->bulk_read == 1); ++ ++ if (c->bu.buf) ++ return; /* Already initialized */ ++ ++again: ++ c->bu.buf = kmalloc(c->max_bu_buf_len, GFP_KERNEL | __GFP_NOWARN); ++ if (!c->bu.buf) { ++ if (c->max_bu_buf_len > UBIFS_KMALLOC_OK) { ++ c->max_bu_buf_len = UBIFS_KMALLOC_OK; ++ goto again; ++ } ++ ++ /* Just disable bulk-read */ ++ ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, " ++ "disabling it", c->max_bu_buf_len); ++ c->mount_opts.bulk_read = 1; ++ c->bulk_read = 0; ++ return; ++ } ++} ++ ++/** ++ * check_free_space - check if there is enough free space to mount. ++ * @c: UBIFS file-system description object ++ * ++ * This function makes sure UBIFS has enough free space to be mounted in ++ * read/write mode. UBIFS must always have some free space to allow deletions. ++ */ ++static int check_free_space(struct ubifs_info *c) ++{ ++ ubifs_assert(c->dark_wm > 0); ++ if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { ++ ubifs_err("insufficient free space to mount in read/write mode"); ++ dbg_dump_budg(c); ++ dbg_dump_lprops(c); ++ return -ENOSPC; ++ } ++ return 0; ++} ++ ++/** ++ * mount_ubifs - mount UBIFS file-system. ++ * @c: UBIFS file-system description object ++ * ++ * This function mounts UBIFS file system. Returns zero in case of success and ++ * a negative error code in case of failure. ++ * ++ * Note, the function does not de-allocate resources it it fails half way ++ * through, and the caller has to do this instead. ++ */ ++static int mount_ubifs(struct ubifs_info *c) ++{ ++ struct super_block *sb = c->vfs_sb; ++ int err, mounted_read_only = (sb->s_flags & MS_RDONLY); ++ long long x; ++ size_t sz; ++ ++ err = init_constants_early(c); ++ if (err) ++ return err; ++ ++ err = ubifs_debugging_init(c); ++ if (err) ++ return err; ++ ++ err = check_volume_empty(c); ++ if (err) ++ goto out_free; ++ ++ if (c->empty && (mounted_read_only || c->ro_media)) { ++ /* ++ * This UBI volume is empty, and read-only, or the file system ++ * is mounted read-only - we cannot format it. ++ */ ++ ubifs_err("can't format empty UBI volume: read-only %s", ++ c->ro_media ? "UBI volume" : "mount"); ++ err = -EROFS; ++ goto out_free; ++ } ++ ++ if (c->ro_media && !mounted_read_only) { ++ ubifs_err("cannot mount read-write - read-only media"); ++ err = -EROFS; ++ goto out_free; ++ } ++ ++ /* ++ * The requirement for the buffer is that it should fit indexing B-tree ++ * height amount of integers. We assume the height if the TNC tree will ++ * never exceed 64. ++ */ ++ err = -ENOMEM; ++ c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL); ++ if (!c->bottom_up_buf) ++ goto out_free; ++ ++ c->sbuf = vmalloc(c->leb_size); ++ if (!c->sbuf) ++ goto out_free; ++ ++ if (!mounted_read_only) { ++ c->ileb_buf = vmalloc(c->leb_size); ++ if (!c->ileb_buf) ++ goto out_free; ++ } ++ ++ if (c->bulk_read == 1) ++ bu_init(c); ++ ++ /* ++ * We have to check all CRCs, even for data nodes, when we mount the FS ++ * (specifically, when we are replaying). ++ */ ++ c->always_chk_crc = 1; ++ ++ err = ubifs_read_superblock(c); ++ if (err) ++ goto out_free; ++ ++ /* ++ * Make sure the compressor which is set as default in the superblock ++ * or overridden by mount options is actually compiled in. ++ */ ++ if (!ubifs_compr_present(c->default_compr)) { ++ ubifs_err("'compressor \"%s\" is not compiled in", ++ ubifs_compr_name(c->default_compr)); ++ goto out_free; ++ } ++ ++ err = init_constants_sb(c); ++ if (err) ++ goto out_free; ++ ++ sz = ALIGN(c->max_idx_node_sz, c->min_io_size); ++ sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); ++ c->cbuf = kmalloc(sz, GFP_NOFS); ++ if (!c->cbuf) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ ++ sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); ++ if (!mounted_read_only) { ++ err = alloc_wbufs(c); ++ if (err) ++ goto out_cbuf; ++ ++ /* Create background thread */ ++ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); ++ if (IS_ERR(c->bgt)) { ++ err = PTR_ERR(c->bgt); ++ c->bgt = NULL; ++ ubifs_err("cannot spawn \"%s\", error %d", ++ c->bgt_name, err); ++ goto out_wbufs; ++ } ++ wake_up_process(c->bgt); ++ } ++ ++ err = ubifs_read_master(c); ++ if (err) ++ goto out_master; ++ ++ init_constants_master(c); ++ ++ if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ++ ubifs_msg("recovery needed"); ++ c->need_recovery = 1; ++ if (!mounted_read_only) { ++ err = ubifs_recover_inl_heads(c, c->sbuf); ++ if (err) ++ goto out_master; ++ } ++ } else if (!mounted_read_only) { ++ /* ++ * Set the "dirty" flag so that if we reboot uncleanly we ++ * will notice this immediately on the next mount. ++ */ ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); ++ err = ubifs_write_master(c); ++ if (err) ++ goto out_master; ++ } ++ ++ err = ubifs_lpt_init(c, 1, !mounted_read_only); ++ if (err) ++ goto out_lpt; ++ ++ err = dbg_check_idx_size(c, c->old_idx_sz); ++ if (err) ++ goto out_lpt; ++ ++ err = ubifs_replay_journal(c); ++ if (err) ++ goto out_journal; ++ ++ err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); ++ if (err) ++ goto out_orphans; ++ ++ if (!mounted_read_only) { ++ int lnum; ++ ++ err = check_free_space(c); ++ if (err) ++ goto out_orphans; ++ ++ /* Check for enough log space */ ++ lnum = c->lhead_lnum + 1; ++ if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) ++ lnum = UBIFS_LOG_LNUM; ++ if (lnum == c->ltail_lnum) { ++ err = ubifs_consolidate_log(c); ++ if (err) ++ goto out_orphans; ++ } ++ ++ if (c->need_recovery) { ++ err = ubifs_recover_size(c); ++ if (err) ++ goto out_orphans; ++ err = ubifs_rcvry_gc_commit(c); ++ } else { ++ err = take_gc_lnum(c); ++ if (err) ++ goto out_orphans; ++ ++ /* ++ * GC LEB may contain garbage if there was an unclean ++ * reboot, and it should be un-mapped. ++ */ ++ err = ubifs_leb_unmap(c, c->gc_lnum); ++ if (err) ++ return err; ++ } ++ ++ err = dbg_check_lprops(c); ++ if (err) ++ goto out_orphans; ++ } else if (c->need_recovery) { ++ err = ubifs_recover_size(c); ++ if (err) ++ goto out_orphans; ++ } else { ++ /* ++ * Even if we mount read-only, we have to set space in GC LEB ++ * to proper value because this affects UBIFS free space ++ * reporting. We do not want to have a situation when ++ * re-mounting from R/O to R/W changes amount of free space. ++ */ ++ err = take_gc_lnum(c); ++ if (err) ++ goto out_orphans; ++ } ++ ++ spin_lock(&ubifs_infos_lock); ++ list_add_tail(&c->infos_list, &ubifs_infos); ++ spin_unlock(&ubifs_infos_lock); ++ ++ if (c->need_recovery) { ++ if (mounted_read_only) ++ ubifs_msg("recovery deferred"); ++ else { ++ c->need_recovery = 0; ++ ubifs_msg("recovery completed"); ++ /* ++ * GC LEB has to be empty and taken at this point. But ++ * the journal head LEBs may also be accounted as ++ * "empty taken" if they are empty. ++ */ ++ ubifs_assert(c->lst.taken_empty_lebs > 0); ++ } ++ } else ++ ubifs_assert(c->lst.taken_empty_lebs > 0); ++ ++ err = dbg_check_filesystem(c); ++ if (err) ++ goto out_infos; ++ ++ err = dbg_debugfs_init_fs(c); ++ if (err) ++ goto out_infos; ++ ++ c->always_chk_crc = 0; ++ ++ ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", ++ c->vi.ubi_num, c->vi.vol_id, c->vi.name); ++ if (mounted_read_only) ++ ubifs_msg("mounted read-only"); ++ x = (long long)c->main_lebs * c->leb_size; ++ ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " ++ "LEBs)", x, x >> 10, x >> 20, c->main_lebs); ++ x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; ++ ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " ++ "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); ++ ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", ++ c->fmt_version, c->ro_compat_version, ++ UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); ++ ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); ++ ubifs_msg("reserved for root: %llu bytes (%llu KiB)", ++ c->report_rp_size, c->report_rp_size >> 10); ++ ++ dbg_msg("compiled on: " __DATE__ " at " __TIME__); ++ dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); ++ dbg_msg("LEB size: %d bytes (%d KiB)", ++ c->leb_size, c->leb_size >> 10); ++ dbg_msg("data journal heads: %d", ++ c->jhead_cnt - NONDATA_JHEADS_CNT); ++ dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" ++ "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", ++ c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], ++ c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], ++ c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], ++ c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); ++ dbg_msg("big_lpt %d", c->big_lpt); ++ dbg_msg("log LEBs: %d (%d - %d)", ++ c->log_lebs, UBIFS_LOG_LNUM, c->log_last); ++ dbg_msg("LPT area LEBs: %d (%d - %d)", ++ c->lpt_lebs, c->lpt_first, c->lpt_last); ++ dbg_msg("orphan area LEBs: %d (%d - %d)", ++ c->orph_lebs, c->orph_first, c->orph_last); ++ dbg_msg("main area LEBs: %d (%d - %d)", ++ c->main_lebs, c->main_first, c->leb_cnt - 1); ++ dbg_msg("index LEBs: %d", c->lst.idx_lebs); ++ dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", ++ c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); ++ dbg_msg("key hash type: %d", c->key_hash_type); ++ dbg_msg("tree fanout: %d", c->fanout); ++ dbg_msg("reserved GC LEB: %d", c->gc_lnum); ++ dbg_msg("first main LEB: %d", c->main_first); ++ dbg_msg("max. znode size %d", c->max_znode_sz); ++ dbg_msg("max. index node size %d", c->max_idx_node_sz); ++ dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", ++ UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); ++ dbg_msg("node sizes: trun %zu, sb %zu, master %zu", ++ UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); ++ dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", ++ UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); ++ dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", ++ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, ++ UBIFS_MAX_DENT_NODE_SZ); ++ dbg_msg("dead watermark: %d", c->dead_wm); ++ dbg_msg("dark watermark: %d", c->dark_wm); ++ dbg_msg("LEB overhead: %d", c->leb_overhead); ++ x = (long long)c->main_lebs * c->dark_wm; ++ dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", ++ x, x >> 10, x >> 20); ++ dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", ++ c->max_bud_bytes, c->max_bud_bytes >> 10, ++ c->max_bud_bytes >> 20); ++ dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", ++ c->bg_bud_bytes, c->bg_bud_bytes >> 10, ++ c->bg_bud_bytes >> 20); ++ dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", ++ c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); ++ dbg_msg("max. seq. number: %llu", c->max_sqnum); ++ dbg_msg("commit number: %llu", c->cmt_no); ++ ++ return 0; ++ ++out_infos: ++ spin_lock(&ubifs_infos_lock); ++ list_del(&c->infos_list); ++ spin_unlock(&ubifs_infos_lock); ++out_orphans: ++ free_orphans(c); ++out_journal: ++ destroy_journal(c); ++out_lpt: ++ ubifs_lpt_free(c, 0); ++out_master: ++ kfree(c->mst_node); ++ kfree(c->rcvrd_mst_node); ++ if (c->bgt) ++ kthread_stop(c->bgt); ++out_wbufs: ++ free_wbufs(c); ++out_cbuf: ++ kfree(c->cbuf); ++out_free: ++ kfree(c->bu.buf); ++ vfree(c->ileb_buf); ++ vfree(c->sbuf); ++ kfree(c->bottom_up_buf); ++ ubifs_debugging_exit(c); ++ return err; ++} ++ ++/** ++ * ubifs_umount - un-mount UBIFS file-system. ++ * @c: UBIFS file-system description object ++ * ++ * Note, this function is called to free allocated resourced when un-mounting, ++ * as well as free resources when an error occurred while we were half way ++ * through mounting (error path cleanup function). So it has to make sure the ++ * resource was actually allocated before freeing it. ++ */ ++static void ubifs_umount(struct ubifs_info *c) ++{ ++ dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, ++ c->vi.vol_id); ++ ++ dbg_debugfs_exit_fs(c); ++ spin_lock(&ubifs_infos_lock); ++ list_del(&c->infos_list); ++ spin_unlock(&ubifs_infos_lock); ++ ++ if (c->bgt) ++ kthread_stop(c->bgt); ++ ++ destroy_journal(c); ++ free_wbufs(c); ++ free_orphans(c); ++ ubifs_lpt_free(c, 0); ++ ++ kfree(c->cbuf); ++ kfree(c->rcvrd_mst_node); ++ kfree(c->mst_node); ++ kfree(c->bu.buf); ++ vfree(c->ileb_buf); ++ vfree(c->sbuf); ++ kfree(c->bottom_up_buf); ++ ubifs_debugging_exit(c); ++} ++ ++/** ++ * ubifs_remount_rw - re-mount in read-write mode. ++ * @c: UBIFS file-system description object ++ * ++ * UBIFS avoids allocating many unnecessary resources when mounted in read-only ++ * mode. This function allocates the needed resources and re-mounts UBIFS in ++ * read-write mode. ++ */ ++static int ubifs_remount_rw(struct ubifs_info *c) ++{ ++ int err, lnum; ++ ++ if (c->rw_incompat) { ++ ubifs_err("the file-system is not R/W-compatible"); ++ ubifs_msg("on-flash format version is w%d/r%d, but software " ++ "only supports up to version w%d/r%d", c->fmt_version, ++ c->ro_compat_version, UBIFS_FORMAT_VERSION, ++ UBIFS_RO_COMPAT_VERSION); ++ return -EROFS; ++ } ++ ++ mutex_lock(&c->umount_mutex); ++ dbg_save_space_info(c); ++ c->remounting_rw = 1; ++ c->always_chk_crc = 1; ++ ++ err = check_free_space(c); ++ if (err) ++ goto out; ++ ++ if (c->old_leb_cnt != c->leb_cnt) { ++ struct ubifs_sb_node *sup; ++ ++ sup = ubifs_read_sb_node(c); ++ if (IS_ERR(sup)) { ++ err = PTR_ERR(sup); ++ goto out; ++ } ++ sup->leb_cnt = cpu_to_le32(c->leb_cnt); ++ err = ubifs_write_sb_node(c, sup); ++ if (err) ++ goto out; ++ } ++ ++ if (c->need_recovery) { ++ ubifs_msg("completing deferred recovery"); ++ err = ubifs_write_rcvrd_mst_node(c); ++ if (err) ++ goto out; ++ err = ubifs_recover_size(c); ++ if (err) ++ goto out; ++ err = ubifs_clean_lebs(c, c->sbuf); ++ if (err) ++ goto out; ++ err = ubifs_recover_inl_heads(c, c->sbuf); ++ if (err) ++ goto out; ++ } else { ++ /* A readonly mount is not allowed to have orphans */ ++ ubifs_assert(c->tot_orphans == 0); ++ err = ubifs_clear_orphans(c); ++ if (err) ++ goto out; ++ } ++ ++ if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); ++ err = ubifs_write_master(c); ++ if (err) ++ goto out; ++ } ++ ++ c->ileb_buf = vmalloc(c->leb_size); ++ if (!c->ileb_buf) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ err = ubifs_lpt_init(c, 0, 1); ++ if (err) ++ goto out; ++ ++ err = alloc_wbufs(c); ++ if (err) ++ goto out; ++ ++ ubifs_create_buds_lists(c); ++ ++ /* Create background thread */ ++ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); ++ if (IS_ERR(c->bgt)) { ++ err = PTR_ERR(c->bgt); ++ c->bgt = NULL; ++ ubifs_err("cannot spawn \"%s\", error %d", ++ c->bgt_name, err); ++ goto out; ++ } ++ wake_up_process(c->bgt); ++ ++ c->orph_buf = vmalloc(c->leb_size); ++ if (!c->orph_buf) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* Check for enough log space */ ++ lnum = c->lhead_lnum + 1; ++ if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) ++ lnum = UBIFS_LOG_LNUM; ++ if (lnum == c->ltail_lnum) { ++ err = ubifs_consolidate_log(c); ++ if (err) ++ goto out; ++ } ++ ++ if (c->need_recovery) ++ err = ubifs_rcvry_gc_commit(c); ++ else ++ err = ubifs_leb_unmap(c, c->gc_lnum); ++ if (err) ++ goto out; ++ ++ if (c->need_recovery) { ++ c->need_recovery = 0; ++ ubifs_msg("deferred recovery completed"); ++ } ++ ++ dbg_gen("re-mounted read-write"); ++ c->vfs_sb->s_flags &= ~MS_RDONLY; ++ c->remounting_rw = 0; ++ c->always_chk_crc = 0; ++ err = dbg_check_space_info(c); ++ mutex_unlock(&c->umount_mutex); ++ return err; ++ ++out: ++ vfree(c->orph_buf); ++ c->orph_buf = NULL; ++ if (c->bgt) { ++ kthread_stop(c->bgt); ++ c->bgt = NULL; ++ } ++ free_wbufs(c); ++ vfree(c->ileb_buf); ++ c->ileb_buf = NULL; ++ ubifs_lpt_free(c, 1); ++ c->remounting_rw = 0; ++ c->always_chk_crc = 0; ++ mutex_unlock(&c->umount_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_remount_ro - re-mount in read-only mode. ++ * @c: UBIFS file-system description object ++ * ++ * We assume VFS has stopped writing. Possibly the background thread could be ++ * running a commit, however kthread_stop will wait in that case. ++ */ ++static void ubifs_remount_ro(struct ubifs_info *c) ++{ ++ int i, err; ++ ++ ubifs_assert(!c->need_recovery); ++ ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); ++ ++ mutex_lock(&c->umount_mutex); ++ if (c->bgt) { ++ kthread_stop(c->bgt); ++ c->bgt = NULL; ++ } ++ ++ dbg_save_space_info(c); ++ ++ for (i = 0; i < c->jhead_cnt; i++) { ++ ubifs_wbuf_sync(&c->jheads[i].wbuf); ++ del_timer_sync(&c->jheads[i].wbuf.timer); ++ } ++ ++ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); ++ c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); ++ err = ubifs_write_master(c); ++ if (err) ++ ubifs_ro_mode(c, err); ++ ++ free_wbufs(c); ++ vfree(c->orph_buf); ++ c->orph_buf = NULL; ++ vfree(c->ileb_buf); ++ c->ileb_buf = NULL; ++ ubifs_lpt_free(c, 1); ++ err = dbg_check_space_info(c); ++ if (err) ++ ubifs_ro_mode(c, err); ++ mutex_unlock(&c->umount_mutex); ++} ++ ++static void ubifs_put_super(struct super_block *sb) ++{ ++ int i; ++ struct ubifs_info *c = sb->s_fs_info; ++ ++ ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, ++ c->vi.vol_id); ++ /* ++ * The following asserts are only valid if there has not been a failure ++ * of the media. For example, there will be dirty inodes if we failed ++ * to write them back because of I/O errors. ++ */ ++ ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); ++ ubifs_assert(c->budg_idx_growth == 0); ++ ubifs_assert(c->budg_dd_growth == 0); ++ ubifs_assert(c->budg_data_growth == 0); ++ ++ /* ++ * The 'c->umount_lock' prevents races between UBIFS memory shrinker ++ * and file system un-mount. Namely, it prevents the shrinker from ++ * picking this superblock for shrinking - it will be just skipped if ++ * the mutex is locked. ++ */ ++ mutex_lock(&c->umount_mutex); ++ if (!(c->vfs_sb->s_flags & MS_RDONLY)) { ++ /* ++ * First of all kill the background thread to make sure it does ++ * not interfere with un-mounting and freeing resources. ++ */ ++ if (c->bgt) { ++ kthread_stop(c->bgt); ++ c->bgt = NULL; ++ } ++ ++ /* Synchronize write-buffers */ ++ if (c->jheads) ++ for (i = 0; i < c->jhead_cnt; i++) { ++ ubifs_wbuf_sync(&c->jheads[i].wbuf); ++ del_timer_sync(&c->jheads[i].wbuf.timer); ++ } ++ ++ /* ++ * On fatal errors c->ro_media is set to 1, in which case we do ++ * not write the master node. ++ */ ++ if (!c->ro_media) { ++ /* ++ * We are being cleanly unmounted which means the ++ * orphans were killed - indicate this in the master ++ * node. Also save the reserved GC LEB number. ++ */ ++ int err; ++ ++ c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); ++ c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); ++ err = ubifs_write_master(c); ++ if (err) ++ /* ++ * Recovery will attempt to fix the master area ++ * next mount, so we just print a message and ++ * continue to unmount normally. ++ */ ++ ubifs_err("failed to write master node, " ++ "error %d", err); ++ } ++ } ++ ++ ubifs_umount(c); ++ bdi_destroy(&c->bdi); ++ ubi_close_volume(c->ubi); ++ mutex_unlock(&c->umount_mutex); ++ kfree(c); ++} ++ ++static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) ++{ ++ int err; ++ struct ubifs_info *c = sb->s_fs_info; ++ ++ dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); ++ ++ err = ubifs_parse_options(c, data, 1); ++ if (err) { ++ ubifs_err("invalid or unknown remount parameter"); ++ return err; ++ } ++ ++ if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { ++ if (c->ro_media) { ++ ubifs_msg("cannot re-mount due to prior errors"); ++ return -EROFS; ++ } ++ err = ubifs_remount_rw(c); ++ if (err) ++ return err; ++ } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { ++ if (c->ro_media) { ++ ubifs_msg("cannot re-mount due to prior errors"); ++ return -EROFS; ++ } ++ ubifs_remount_ro(c); ++ } ++ ++ if (c->bulk_read == 1) ++ bu_init(c); ++ else { ++ dbg_gen("disable bulk-read"); ++ kfree(c->bu.buf); ++ c->bu.buf = NULL; ++ } ++ ++ ubifs_assert(c->lst.taken_empty_lebs > 0); ++ return 0; ++} ++ ++const struct super_operations ubifs_super_operations = { ++ .read_inode = ubifs_read_inode, ++ .alloc_inode = ubifs_alloc_inode, ++ .destroy_inode = ubifs_destroy_inode, ++ .put_super = ubifs_put_super, ++ .write_inode = ubifs_write_inode, ++ .delete_inode = ubifs_delete_inode, ++ .statfs = ubifs_statfs, ++ .dirty_inode = ubifs_dirty_inode, ++ .remount_fs = ubifs_remount_fs, ++ .show_options = ubifs_show_options, ++ .sync_fs = ubifs_sync_fs, ++}; ++ ++/** ++ * open_ubi - parse UBI device name string and open the UBI device. ++ * @name: UBI volume name ++ * @mode: UBI volume open mode ++ * ++ * There are several ways to specify UBI volumes when mounting UBIFS: ++ * o ubiX_Y - UBI device number X, volume Y; ++ * o ubiY - UBI device number 0, volume Y; ++ * o ubiX:NAME - mount UBI device X, volume with name NAME; ++ * o ubi:NAME - mount UBI device 0, volume with name NAME. ++ * ++ * Alternative '!' separator may be used instead of ':' (because some shells ++ * like busybox may interpret ':' as an NFS host name separator). This function ++ * returns ubi volume object in case of success and a negative error code in ++ * case of failure. ++ */ ++static struct ubi_volume_desc *open_ubi(const char *name, int mode) ++{ ++ int dev, vol; ++ char *endptr; ++ ++ if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') ++ return ERR_PTR(-EINVAL); ++ ++ /* ubi:NAME method */ ++ if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') ++ return ubi_open_volume_nm(0, name + 4, mode); ++ ++ if (!isdigit(name[3])) ++ return ERR_PTR(-EINVAL); ++ ++ dev = simple_strtoul(name + 3, &endptr, 0); ++ ++ /* ubiY method */ ++ if (*endptr == '\0') ++ return ubi_open_volume(0, dev, mode); ++ ++ /* ubiX_Y method */ ++ if (*endptr == '_' && isdigit(endptr[1])) { ++ vol = simple_strtoul(endptr + 1, &endptr, 0); ++ if (*endptr != '\0') ++ return ERR_PTR(-EINVAL); ++ return ubi_open_volume(dev, vol, mode); ++ } ++ ++ /* ubiX:NAME method */ ++ if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') ++ return ubi_open_volume_nm(dev, ++endptr, mode); ++ ++ return ERR_PTR(-EINVAL); ++} ++ ++static int ubifs_fill_super(struct super_block *sb, void *data, int silent) ++{ ++ struct ubi_volume_desc *ubi = sb->s_fs_info; ++ struct ubifs_info *c; ++ struct inode *root; ++ int err; ++ ++ c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); ++ if (!c) ++ return -ENOMEM; ++ ++ spin_lock_init(&c->cnt_lock); ++ spin_lock_init(&c->cs_lock); ++ spin_lock_init(&c->buds_lock); ++ spin_lock_init(&c->space_lock); ++ spin_lock_init(&c->orphan_lock); ++ init_rwsem(&c->commit_sem); ++ mutex_init(&c->lp_mutex); ++ mutex_init(&c->tnc_mutex); ++ mutex_init(&c->log_mutex); ++ mutex_init(&c->mst_mutex); ++ mutex_init(&c->umount_mutex); ++ mutex_init(&c->bu_mutex); ++ init_waitqueue_head(&c->cmt_wq); ++ c->buds = RB_ROOT; ++ c->old_idx = RB_ROOT; ++ c->size_tree = RB_ROOT; ++ c->orph_tree = RB_ROOT; ++ INIT_LIST_HEAD(&c->infos_list); ++ INIT_LIST_HEAD(&c->idx_gc); ++ INIT_LIST_HEAD(&c->replay_list); ++ INIT_LIST_HEAD(&c->replay_buds); ++ INIT_LIST_HEAD(&c->uncat_list); ++ INIT_LIST_HEAD(&c->empty_list); ++ INIT_LIST_HEAD(&c->freeable_list); ++ INIT_LIST_HEAD(&c->frdi_idx_list); ++ INIT_LIST_HEAD(&c->unclean_leb_list); ++ INIT_LIST_HEAD(&c->old_buds); ++ INIT_LIST_HEAD(&c->orph_list); ++ INIT_LIST_HEAD(&c->orph_new); ++ ++ c->highest_inum = UBIFS_FIRST_INO; ++ c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; ++ ++ ubi_get_volume_info(ubi, &c->vi); ++ ubi_get_device_info(c->vi.ubi_num, &c->di); ++ ++ /* Re-open the UBI device in read-write mode */ ++ c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); ++ if (IS_ERR(c->ubi)) { ++ err = PTR_ERR(c->ubi); ++ goto out_free; ++ } ++ ++ /* ++ * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For ++ * UBIFS, I/O is not deferred, it is done immediately in readpage, ++ * which means the user would have to wait not just for their own I/O ++ * but the read-ahead I/O as well i.e. completely pointless. ++ * ++ * Read-ahead will be disabled because @c->bdi.ra_pages is 0. ++ */ ++ c->bdi.capabilities = BDI_CAP_MAP_COPY; ++ c->bdi.unplug_io_fn = default_unplug_io_fn; ++ err = bdi_init(&c->bdi); ++ if (err) ++ goto out_close; ++ ++ err = ubifs_parse_options(c, data, 0); ++ if (err) ++ goto out_bdi; ++ ++ c->vfs_sb = sb; ++ ++ sb->s_fs_info = c; ++ sb->s_magic = UBIFS_SUPER_MAGIC; ++ sb->s_blocksize = UBIFS_BLOCK_SIZE; ++ sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; ++ sb->s_dev = c->vi.cdev; ++ sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); ++ if (c->max_inode_sz > MAX_LFS_FILESIZE) ++ sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; ++ sb->s_op = &ubifs_super_operations; ++ ++ mutex_lock(&c->umount_mutex); ++ err = mount_ubifs(c); ++ if (err) { ++ ubifs_assert(err < 0); ++ goto out_unlock; ++ } ++ ++ /* Read the root inode */ ++ root = ubifs_iget(sb, UBIFS_ROOT_INO); ++ if (IS_ERR(root)) { ++ err = PTR_ERR(root); ++ goto out_umount; ++ } ++ ++ sb->s_root = d_alloc_root(root); ++ if (!sb->s_root) ++ goto out_iput; ++ ++ mutex_unlock(&c->umount_mutex); ++ return 0; ++ ++out_iput: ++ iput(root); ++out_umount: ++ ubifs_umount(c); ++out_unlock: ++ mutex_unlock(&c->umount_mutex); ++out_bdi: ++ bdi_destroy(&c->bdi); ++out_close: ++ ubi_close_volume(c->ubi); ++out_free: ++ kfree(c); ++ return err; ++} ++ ++static int sb_test(struct super_block *sb, void *data) ++{ ++ dev_t *dev = data; ++ ++ return sb->s_dev == *dev; ++} ++ ++static int sb_set(struct super_block *sb, void *data) ++{ ++ dev_t *dev = data; ++ ++ sb->s_dev = *dev; ++ return 0; ++} ++ ++static int ubifs_get_sb(struct file_system_type *fs_type, int flags, ++ const char *name, void *data, struct vfsmount *mnt) ++{ ++ struct ubi_volume_desc *ubi; ++ struct ubi_volume_info vi; ++ struct super_block *sb; ++ int err; ++ ++ dbg_gen("name %s, flags %#x", name, flags); ++ ++ /* ++ * Get UBI device number and volume ID. Mount it read-only so far ++ * because this might be a new mount point, and UBI allows only one ++ * read-write user at a time. ++ */ ++ ubi = open_ubi(name, UBI_READONLY); ++ if (IS_ERR(ubi)) { ++ ubifs_err("cannot open \"%s\", error %d", ++ name, (int)PTR_ERR(ubi)); ++ return PTR_ERR(ubi); ++ } ++ ubi_get_volume_info(ubi, &vi); ++ ++ dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); ++ ++ sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); ++ if (IS_ERR(sb)) { ++ err = PTR_ERR(sb); ++ goto out_close; ++ } ++ ++ if (sb->s_root) { ++ /* A new mount point for already mounted UBIFS */ ++ dbg_gen("this ubi volume is already mounted"); ++ if ((flags ^ sb->s_flags) & MS_RDONLY) { ++ err = -EBUSY; ++ goto out_deact; ++ } ++ } else { ++ sb->s_flags = flags; ++ /* ++ * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is ++ * replaced by 'c'. ++ */ ++ sb->s_fs_info = ubi; ++ err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); ++ if (err) ++ goto out_deact; ++ /* We do not support atime */ ++ sb->s_flags |= MS_ACTIVE | MS_NOATIME; ++ } ++ ++ /* 'fill_super()' opens ubi again so we must close it here */ ++ ubi_close_volume(ubi); ++ ++ return simple_set_mnt(mnt, sb); ++ ++out_deact: ++ up_write(&sb->s_umount); ++ deactivate_super(sb); ++out_close: ++ ubi_close_volume(ubi); ++ return err; ++} ++ ++static void ubifs_kill_sb(struct super_block *sb) ++{ ++ generic_shutdown_super(sb); ++} ++ ++static struct file_system_type ubifs_fs_type = { ++ .name = "ubifs", ++ .owner = THIS_MODULE, ++ .get_sb = ubifs_get_sb, ++ .kill_sb = ubifs_kill_sb, ++ .fs_flags = FS_REQUIRES_DEV, ++}; ++ ++/* ++ * Inode slab cache constructor. ++ */ ++static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) ++{ ++ struct ubifs_inode *ui = obj; ++ inode_init_once(&ui->vfs_inode); ++} ++ ++static int __init ubifs_init(void) ++{ ++ int err; ++ ++ BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); ++ ++ /* Make sure node sizes are 8-byte aligned */ ++ BUILD_BUG_ON(UBIFS_CH_SZ & 7); ++ BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7); ++ ++ BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7); ++ BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7); ++ BUILD_BUG_ON(MIN_WRITE_SZ & 7); ++ ++ /* Check min. node size */ ++ BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ); ++ BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ); ++ BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ); ++ BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ); ++ ++ BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ); ++ BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ); ++ BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ); ++ BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ); ++ ++ /* Defined node sizes */ ++ BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096); ++ BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512); ++ BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160); ++ BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); ++ ++ /* ++ * We use 2 bit wide bit-fields to store compression type, which should ++ * be amended if more compressors are added. The bit-fields are: ++ * @compr_type in 'struct ubifs_inode', @default_compr in ++ * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'. ++ */ ++ BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4); ++ ++ /* ++ * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to ++ * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. ++ */ ++ if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { ++ ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" ++ " at least 4096 bytes", ++ (unsigned int)PAGE_CACHE_SIZE); ++ return -EINVAL; ++ } ++ ++ err = register_filesystem(&ubifs_fs_type); ++ if (err) { ++ ubifs_err("cannot register file system, error %d", err); ++ return err; ++ } ++ ++ err = -ENOMEM; ++ ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", ++ sizeof(struct ubifs_inode), 0, ++ SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, ++ &inode_slab_ctor); ++ if (!ubifs_inode_slab) ++ goto out_reg; ++ ++ register_shrinker(&ubifs_shrinker_info); ++ ++ err = ubifs_compressors_init(); ++ if (err) ++ goto out_shrinker; ++ ++ err = dbg_debugfs_init(); ++ if (err) ++ goto out_compr; ++ ++ return 0; ++ ++out_compr: ++ ubifs_compressors_exit(); ++out_shrinker: ++ unregister_shrinker(&ubifs_shrinker_info); ++ kmem_cache_destroy(ubifs_inode_slab); ++out_reg: ++ unregister_filesystem(&ubifs_fs_type); ++ return err; ++} ++/* late_initcall to let compressors initialize first */ ++late_initcall(ubifs_init); ++ ++static void __exit ubifs_exit(void) ++{ ++ ubifs_assert(list_empty(&ubifs_infos)); ++ ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); ++ ++ dbg_debugfs_exit(); ++ ubifs_compressors_exit(); ++ unregister_shrinker(&ubifs_shrinker_info); ++ kmem_cache_destroy(ubifs_inode_slab); ++ unregister_filesystem(&ubifs_fs_type); ++} ++module_exit(ubifs_exit); ++ ++MODULE_LICENSE("GPL"); ++MODULE_VERSION(__stringify(UBIFS_VERSION)); ++MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter"); ++MODULE_DESCRIPTION("UBIFS - UBI File System"); +diff -Nurd linux-2.6.24/fs/ubifs/tnc.c ubifs-v2.6.24/fs/ubifs/tnc.c +--- linux-2.6.24/fs/ubifs/tnc.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/tnc.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,3270 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file implements TNC (Tree Node Cache) which caches indexing nodes of ++ * the UBIFS B-tree. ++ * ++ * At the moment the locking rules of the TNC tree are quite simple and ++ * straightforward. We just have a mutex and lock it when we traverse the ++ * tree. If a znode is not in memory, we read it from flash while still having ++ * the mutex locked. ++ */ ++ ++#include <linux/crc32.h> ++#include "ubifs.h" ++ ++/* ++ * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. ++ * @NAME_LESS: name corresponding to the first argument is less than second ++ * @NAME_MATCHES: names match ++ * @NAME_GREATER: name corresponding to the second argument is greater than ++ * first ++ * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media ++ * ++ * These constants were introduce to improve readability. ++ */ ++enum { ++ NAME_LESS = 0, ++ NAME_MATCHES = 1, ++ NAME_GREATER = 2, ++ NOT_ON_MEDIA = 3, ++}; ++ ++/** ++ * insert_old_idx - record an index node obsoleted since the last commit start. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number of obsoleted index node ++ * @offs: offset of obsoleted index node ++ * ++ * Returns %0 on success, and a negative error code on failure. ++ * ++ * For recovery, there must always be a complete intact version of the index on ++ * flash at all times. That is called the "old index". It is the index as at the ++ * time of the last successful commit. Many of the index nodes in the old index ++ * may be dirty, but they must not be erased until the next successful commit ++ * (at which point that index becomes the old index). ++ * ++ * That means that the garbage collection and the in-the-gaps method of ++ * committing must be able to determine if an index node is in the old index. ++ * Most of the old index nodes can be found by looking up the TNC using the ++ * 'lookup_znode()' function. However, some of the old index nodes may have ++ * been deleted from the current index or may have been changed so much that ++ * they cannot be easily found. In those cases, an entry is added to an RB-tree. ++ * That is what this function does. The RB-tree is ordered by LEB number and ++ * offset because they uniquely identify the old index node. ++ */ ++static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) ++{ ++ struct ubifs_old_idx *old_idx, *o; ++ struct rb_node **p, *parent = NULL; ++ ++ old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); ++ if (unlikely(!old_idx)) ++ return -ENOMEM; ++ old_idx->lnum = lnum; ++ old_idx->offs = offs; ++ ++ p = &c->old_idx.rb_node; ++ while (*p) { ++ parent = *p; ++ o = rb_entry(parent, struct ubifs_old_idx, rb); ++ if (lnum < o->lnum) ++ p = &(*p)->rb_left; ++ else if (lnum > o->lnum) ++ p = &(*p)->rb_right; ++ else if (offs < o->offs) ++ p = &(*p)->rb_left; ++ else if (offs > o->offs) ++ p = &(*p)->rb_right; ++ else { ++ ubifs_err("old idx added twice!"); ++ kfree(old_idx); ++ return 0; ++ } ++ } ++ rb_link_node(&old_idx->rb, parent, p); ++ rb_insert_color(&old_idx->rb, &c->old_idx); ++ return 0; ++} ++ ++/** ++ * insert_old_idx_znode - record a znode obsoleted since last commit start. ++ * @c: UBIFS file-system description object ++ * @znode: znode of obsoleted index node ++ * ++ * Returns %0 on success, and a negative error code on failure. ++ */ ++int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode) ++{ ++ if (znode->parent) { ++ struct ubifs_zbranch *zbr; ++ ++ zbr = &znode->parent->zbranch[znode->iip]; ++ if (zbr->len) ++ return insert_old_idx(c, zbr->lnum, zbr->offs); ++ } else ++ if (c->zroot.len) ++ return insert_old_idx(c, c->zroot.lnum, ++ c->zroot.offs); ++ return 0; ++} ++ ++/** ++ * ins_clr_old_idx_znode - record a znode obsoleted since last commit start. ++ * @c: UBIFS file-system description object ++ * @znode: znode of obsoleted index node ++ * ++ * Returns %0 on success, and a negative error code on failure. ++ */ ++static int ins_clr_old_idx_znode(struct ubifs_info *c, ++ struct ubifs_znode *znode) ++{ ++ int err; ++ ++ if (znode->parent) { ++ struct ubifs_zbranch *zbr; ++ ++ zbr = &znode->parent->zbranch[znode->iip]; ++ if (zbr->len) { ++ err = insert_old_idx(c, zbr->lnum, zbr->offs); ++ if (err) ++ return err; ++ zbr->lnum = 0; ++ zbr->offs = 0; ++ zbr->len = 0; ++ } ++ } else ++ if (c->zroot.len) { ++ err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs); ++ if (err) ++ return err; ++ c->zroot.lnum = 0; ++ c->zroot.offs = 0; ++ c->zroot.len = 0; ++ } ++ return 0; ++} ++ ++/** ++ * destroy_old_idx - destroy the old_idx RB-tree. ++ * @c: UBIFS file-system description object ++ * ++ * During start commit, the old_idx RB-tree is used to avoid overwriting index ++ * nodes that were in the index last commit but have since been deleted. This ++ * is necessary for recovery i.e. the old index must be kept intact until the ++ * new index is successfully written. The old-idx RB-tree is used for the ++ * in-the-gaps method of writing index nodes and is destroyed every commit. ++ */ ++void destroy_old_idx(struct ubifs_info *c) ++{ ++ struct rb_node *this = c->old_idx.rb_node; ++ struct ubifs_old_idx *old_idx; ++ ++ while (this) { ++ if (this->rb_left) { ++ this = this->rb_left; ++ continue; ++ } else if (this->rb_right) { ++ this = this->rb_right; ++ continue; ++ } ++ old_idx = rb_entry(this, struct ubifs_old_idx, rb); ++ this = rb_parent(this); ++ if (this) { ++ if (this->rb_left == &old_idx->rb) ++ this->rb_left = NULL; ++ else ++ this->rb_right = NULL; ++ } ++ kfree(old_idx); ++ } ++ c->old_idx = RB_ROOT; ++} ++ ++/** ++ * copy_znode - copy a dirty znode. ++ * @c: UBIFS file-system description object ++ * @znode: znode to copy ++ * ++ * A dirty znode being committed may not be changed, so it is copied. ++ */ ++static struct ubifs_znode *copy_znode(struct ubifs_info *c, ++ struct ubifs_znode *znode) ++{ ++ struct ubifs_znode *zn; ++ ++ zn = kmalloc(c->max_znode_sz, GFP_NOFS); ++ if (unlikely(!zn)) ++ return ERR_PTR(-ENOMEM); ++ ++ memcpy(zn, znode, c->max_znode_sz); ++ zn->cnext = NULL; ++ __set_bit(DIRTY_ZNODE, &zn->flags); ++ __clear_bit(COW_ZNODE, &zn->flags); ++ ++ ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ++ __set_bit(OBSOLETE_ZNODE, &znode->flags); ++ ++ if (znode->level != 0) { ++ int i; ++ const int n = zn->child_cnt; ++ ++ /* The children now have new parent */ ++ for (i = 0; i < n; i++) { ++ struct ubifs_zbranch *zbr = &zn->zbranch[i]; ++ ++ if (zbr->znode) ++ zbr->znode->parent = zn; ++ } ++ } ++ ++ atomic_long_inc(&c->dirty_zn_cnt); ++ return zn; ++} ++ ++/** ++ * add_idx_dirt - add dirt due to a dirty znode. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number of index node ++ * @dirt: size of index node ++ * ++ * This function updates lprops dirty space and the new size of the index. ++ */ ++static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) ++{ ++ c->calc_idx_sz -= ALIGN(dirt, 8); ++ return ubifs_add_dirt(c, lnum, dirt); ++} ++ ++/** ++ * dirty_cow_znode - ensure a znode is not being committed. ++ * @c: UBIFS file-system description object ++ * @zbr: branch of znode to check ++ * ++ * Returns dirtied znode on success or negative error code on failure. ++ */ ++static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, ++ struct ubifs_zbranch *zbr) ++{ ++ struct ubifs_znode *znode = zbr->znode; ++ struct ubifs_znode *zn; ++ int err; ++ ++ if (!test_bit(COW_ZNODE, &znode->flags)) { ++ /* znode is not being committed */ ++ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { ++ atomic_long_inc(&c->dirty_zn_cnt); ++ atomic_long_dec(&c->clean_zn_cnt); ++ atomic_long_dec(&ubifs_clean_zn_cnt); ++ err = add_idx_dirt(c, zbr->lnum, zbr->len); ++ if (unlikely(err)) ++ return ERR_PTR(err); ++ } ++ return znode; ++ } ++ ++ zn = copy_znode(c, znode); ++ if (IS_ERR(zn)) ++ return zn; ++ ++ if (zbr->len) { ++ err = insert_old_idx(c, zbr->lnum, zbr->offs); ++ if (unlikely(err)) ++ return ERR_PTR(err); ++ err = add_idx_dirt(c, zbr->lnum, zbr->len); ++ } else ++ err = 0; ++ ++ zbr->znode = zn; ++ zbr->lnum = 0; ++ zbr->offs = 0; ++ zbr->len = 0; ++ ++ if (unlikely(err)) ++ return ERR_PTR(err); ++ return zn; ++} ++ ++/** ++ * lnc_add - add a leaf node to the leaf node cache. ++ * @c: UBIFS file-system description object ++ * @zbr: zbranch of leaf node ++ * @node: leaf node ++ * ++ * Leaf nodes are non-index nodes directory entry nodes or data nodes. The ++ * purpose of the leaf node cache is to save re-reading the same leaf node over ++ * and over again. Most things are cached by VFS, however the file system must ++ * cache directory entries for readdir and for resolving hash collisions. The ++ * present implementation of the leaf node cache is extremely simple, and ++ * allows for error returns that are not used but that may be needed if a more ++ * complex implementation is created. ++ * ++ * Note, this function does not add the @node object to LNC directly, but ++ * allocates a copy of the object and adds the copy to LNC. The reason for this ++ * is that @node has been allocated outside of the TNC subsystem and will be ++ * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC ++ * may be changed at any time, e.g. freed by the shrinker. ++ */ ++static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ const void *node) ++{ ++ int err; ++ void *lnc_node; ++ const struct ubifs_dent_node *dent = node; ++ ++ ubifs_assert(!zbr->leaf); ++ ubifs_assert(zbr->len != 0); ++ ubifs_assert(is_hash_key(c, &zbr->key)); ++ ++ err = ubifs_validate_entry(c, dent); ++ if (err) { ++ dbg_dump_stack(); ++ dbg_dump_node(c, dent); ++ return err; ++ } ++ ++ lnc_node = kmalloc(zbr->len, GFP_NOFS); ++ if (!lnc_node) ++ /* We don't have to have the cache, so no error */ ++ return 0; ++ ++ memcpy(lnc_node, node, zbr->len); ++ zbr->leaf = lnc_node; ++ return 0; ++} ++ ++ /** ++ * lnc_add_directly - add a leaf node to the leaf-node-cache. ++ * @c: UBIFS file-system description object ++ * @zbr: zbranch of leaf node ++ * @node: leaf node ++ * ++ * This function is similar to 'lnc_add()', but it does not create a copy of ++ * @node but inserts @node to TNC directly. ++ */ ++static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ void *node) ++{ ++ int err; ++ ++ ubifs_assert(!zbr->leaf); ++ ubifs_assert(zbr->len != 0); ++ ++ err = ubifs_validate_entry(c, node); ++ if (err) { ++ dbg_dump_stack(); ++ dbg_dump_node(c, node); ++ return err; ++ } ++ ++ zbr->leaf = node; ++ return 0; ++} ++ ++/** ++ * lnc_free - remove a leaf node from the leaf node cache. ++ * @zbr: zbranch of leaf node ++ * @node: leaf node ++ */ ++static void lnc_free(struct ubifs_zbranch *zbr) ++{ ++ if (!zbr->leaf) ++ return; ++ kfree(zbr->leaf); ++ zbr->leaf = NULL; ++} ++ ++/** ++ * tnc_read_node_nm - read a "hashed" leaf node. ++ * @c: UBIFS file-system description object ++ * @zbr: key and position of the node ++ * @node: node is returned here ++ * ++ * This function reads a "hashed" node defined by @zbr from the leaf node cache ++ * (in it is there) or from the hash media, in which case the node is also ++ * added to LNC. Returns zero in case of success or a negative negative error ++ * code in case of failure. ++ */ ++static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ void *node) ++{ ++ int err; ++ ++ ubifs_assert(is_hash_key(c, &zbr->key)); ++ ++ if (zbr->leaf) { ++ /* Read from the leaf node cache */ ++ ubifs_assert(zbr->len != 0); ++ memcpy(node, zbr->leaf, zbr->len); ++ return 0; ++ } ++ ++ err = ubifs_tnc_read_node(c, zbr, node); ++ if (err) ++ return err; ++ ++ /* Add the node to the leaf node cache */ ++ err = lnc_add(c, zbr, node); ++ return err; ++} ++ ++/** ++ * try_read_node - read a node if it is a node. ++ * @c: UBIFS file-system description object ++ * @buf: buffer to read to ++ * @type: node type ++ * @len: node length (not aligned) ++ * @lnum: LEB number of node to read ++ * @offs: offset of node to read ++ * ++ * This function tries to read a node of known type and length, checks it and ++ * stores it in @buf. This function returns %1 if a node is present and %0 if ++ * a node is not present. A negative error code is returned for I/O errors. ++ * This function performs that same function as ubifs_read_node except that ++ * it does not require that there is actually a node present and instead ++ * the return code indicates if a node was read. ++ * ++ * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc ++ * is true (it is controlled by corresponding mount option). However, if ++ * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always ++ * checked. ++ */ ++static int try_read_node(const struct ubifs_info *c, void *buf, int type, ++ int len, int lnum, int offs) ++{ ++ int err, node_len; ++ struct ubifs_ch *ch = buf; ++ uint32_t crc, node_crc; ++ ++ dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); ++ ++ err = ubi_read(c->ubi, lnum, buf, offs, len); ++ if (err) { ++ ubifs_err("cannot read node type %d from LEB %d:%d, error %d", ++ type, lnum, offs, err); ++ return err; ++ } ++ ++ if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) ++ return 0; ++ ++ if (ch->node_type != type) ++ return 0; ++ ++ node_len = le32_to_cpu(ch->len); ++ if (node_len != len) ++ return 0; ++ ++ if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) ++ return 1; ++ ++ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); ++ node_crc = le32_to_cpu(ch->crc); ++ if (crc != node_crc) ++ return 0; ++ ++ return 1; ++} ++ ++/** ++ * fallible_read_node - try to read a leaf node. ++ * @c: UBIFS file-system description object ++ * @key: key of node to read ++ * @zbr: position of node ++ * @node: node returned ++ * ++ * This function tries to read a node and returns %1 if the node is read, %0 ++ * if the node is not present, and a negative error code in the case of error. ++ */ ++static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, ++ struct ubifs_zbranch *zbr, void *node) ++{ ++ int ret; ++ ++ dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); ++ ++ ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, ++ zbr->offs); ++ if (ret == 1) { ++ union ubifs_key node_key; ++ struct ubifs_dent_node *dent = node; ++ ++ /* All nodes have key in the same place */ ++ key_read(c, &dent->key, &node_key); ++ if (keys_cmp(c, key, &node_key) != 0) ++ ret = 0; ++ } ++ if (ret == 0 && c->replaying) ++ dbg_mnt("dangling branch LEB %d:%d len %d, key %s", ++ zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); ++ return ret; ++} ++ ++/** ++ * matches_name - determine if a direntry or xattr entry matches a given name. ++ * @c: UBIFS file-system description object ++ * @zbr: zbranch of dent ++ * @nm: name to match ++ * ++ * This function checks if xentry/direntry referred by zbranch @zbr matches name ++ * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by ++ * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case ++ * of failure, a negative error code is returned. ++ */ ++static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ const struct qstr *nm) ++{ ++ struct ubifs_dent_node *dent; ++ int nlen, err; ++ ++ /* If possible, match against the dent in the leaf node cache */ ++ if (!zbr->leaf) { ++ dent = kmalloc(zbr->len, GFP_NOFS); ++ if (!dent) ++ return -ENOMEM; ++ ++ err = ubifs_tnc_read_node(c, zbr, dent); ++ if (err) ++ goto out_free; ++ ++ /* Add the node to the leaf node cache */ ++ err = lnc_add_directly(c, zbr, dent); ++ if (err) ++ goto out_free; ++ } else ++ dent = zbr->leaf; ++ ++ nlen = le16_to_cpu(dent->nlen); ++ err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); ++ if (err == 0) { ++ if (nlen == nm->len) ++ return NAME_MATCHES; ++ else if (nlen < nm->len) ++ return NAME_LESS; ++ else ++ return NAME_GREATER; ++ } else if (err < 0) ++ return NAME_LESS; ++ else ++ return NAME_GREATER; ++ ++out_free: ++ kfree(dent); ++ return err; ++} ++ ++/** ++ * get_znode - get a TNC znode that may not be loaded yet. ++ * @c: UBIFS file-system description object ++ * @znode: parent znode ++ * @n: znode branch slot number ++ * ++ * This function returns the znode or a negative error code. ++ */ ++static struct ubifs_znode *get_znode(struct ubifs_info *c, ++ struct ubifs_znode *znode, int n) ++{ ++ struct ubifs_zbranch *zbr; ++ ++ zbr = &znode->zbranch[n]; ++ if (zbr->znode) ++ znode = zbr->znode; ++ else ++ znode = ubifs_load_znode(c, zbr, znode, n); ++ return znode; ++} ++ ++/** ++ * tnc_next - find next TNC entry. ++ * @c: UBIFS file-system description object ++ * @zn: znode is passed and returned here ++ * @n: znode branch slot number is passed and returned here ++ * ++ * This function returns %0 if the next TNC entry is found, %-ENOENT if there is ++ * no next entry, or a negative error code otherwise. ++ */ ++static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n) ++{ ++ struct ubifs_znode *znode = *zn; ++ int nn = *n; ++ ++ nn += 1; ++ if (nn < znode->child_cnt) { ++ *n = nn; ++ return 0; ++ } ++ while (1) { ++ struct ubifs_znode *zp; ++ ++ zp = znode->parent; ++ if (!zp) ++ return -ENOENT; ++ nn = znode->iip + 1; ++ znode = zp; ++ if (nn < znode->child_cnt) { ++ znode = get_znode(c, znode, nn); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ while (znode->level != 0) { ++ znode = get_znode(c, znode, 0); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ } ++ nn = 0; ++ break; ++ } ++ } ++ *zn = znode; ++ *n = nn; ++ return 0; ++} ++ ++/** ++ * tnc_prev - find previous TNC entry. ++ * @c: UBIFS file-system description object ++ * @zn: znode is returned here ++ * @n: znode branch slot number is passed and returned here ++ * ++ * This function returns %0 if the previous TNC entry is found, %-ENOENT if ++ * there is no next entry, or a negative error code otherwise. ++ */ ++static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n) ++{ ++ struct ubifs_znode *znode = *zn; ++ int nn = *n; ++ ++ if (nn > 0) { ++ *n = nn - 1; ++ return 0; ++ } ++ while (1) { ++ struct ubifs_znode *zp; ++ ++ zp = znode->parent; ++ if (!zp) ++ return -ENOENT; ++ nn = znode->iip - 1; ++ znode = zp; ++ if (nn >= 0) { ++ znode = get_znode(c, znode, nn); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ while (znode->level != 0) { ++ nn = znode->child_cnt - 1; ++ znode = get_znode(c, znode, nn); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ } ++ nn = znode->child_cnt - 1; ++ break; ++ } ++ } ++ *zn = znode; ++ *n = nn; ++ return 0; ++} ++ ++/** ++ * resolve_collision - resolve a collision. ++ * @c: UBIFS file-system description object ++ * @key: key of a directory or extended attribute entry ++ * @zn: znode is returned here ++ * @n: zbranch number is passed and returned here ++ * @nm: name of the entry ++ * ++ * This function is called for "hashed" keys to make sure that the found key ++ * really corresponds to the looked up node (directory or extended attribute ++ * entry). It returns %1 and sets @zn and @n if the collision is resolved. ++ * %0 is returned if @nm is not found and @zn and @n are set to the previous ++ * entry, i.e. to the entry after which @nm could follow if it were in TNC. ++ * This means that @n may be set to %-1 if the leftmost key in @zn is the ++ * previous one. A negative error code is returned on failures. ++ */ ++static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key, ++ struct ubifs_znode **zn, int *n, ++ const struct qstr *nm) ++{ ++ int err; ++ ++ err = matches_name(c, &(*zn)->zbranch[*n], nm); ++ if (unlikely(err < 0)) ++ return err; ++ if (err == NAME_MATCHES) ++ return 1; ++ ++ if (err == NAME_GREATER) { ++ /* Look left */ ++ while (1) { ++ err = tnc_prev(c, zn, n); ++ if (err == -ENOENT) { ++ ubifs_assert(*n == 0); ++ *n = -1; ++ return 0; ++ } ++ if (err < 0) ++ return err; ++ if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { ++ /* ++ * We have found the branch after which we would ++ * like to insert, but inserting in this znode ++ * may still be wrong. Consider the following 3 ++ * znodes, in the case where we are resolving a ++ * collision with Key2. ++ * ++ * znode zp ++ * ---------------------- ++ * level 1 | Key0 | Key1 | ++ * ----------------------- ++ * | | ++ * znode za | | znode zb ++ * ------------ ------------ ++ * level 0 | Key0 | | Key2 | ++ * ------------ ------------ ++ * ++ * The lookup finds Key2 in znode zb. Lets say ++ * there is no match and the name is greater so ++ * we look left. When we find Key0, we end up ++ * here. If we return now, we will insert into ++ * znode za at slot n = 1. But that is invalid ++ * according to the parent's keys. Key2 must ++ * be inserted into znode zb. ++ * ++ * Note, this problem is not relevant for the ++ * case when we go right, because ++ * 'tnc_insert()' would correct the parent key. ++ */ ++ if (*n == (*zn)->child_cnt - 1) { ++ err = tnc_next(c, zn, n); ++ if (err) { ++ /* Should be impossible */ ++ ubifs_assert(0); ++ if (err == -ENOENT) ++ err = -EINVAL; ++ return err; ++ } ++ ubifs_assert(*n == 0); ++ *n = -1; ++ } ++ return 0; ++ } ++ err = matches_name(c, &(*zn)->zbranch[*n], nm); ++ if (err < 0) ++ return err; ++ if (err == NAME_LESS) ++ return 0; ++ if (err == NAME_MATCHES) ++ return 1; ++ ubifs_assert(err == NAME_GREATER); ++ } ++ } else { ++ int nn = *n; ++ struct ubifs_znode *znode = *zn; ++ ++ /* Look right */ ++ while (1) { ++ err = tnc_next(c, &znode, &nn); ++ if (err == -ENOENT) ++ return 0; ++ if (err < 0) ++ return err; ++ if (keys_cmp(c, &znode->zbranch[nn].key, key)) ++ return 0; ++ err = matches_name(c, &znode->zbranch[nn], nm); ++ if (err < 0) ++ return err; ++ if (err == NAME_GREATER) ++ return 0; ++ *zn = znode; ++ *n = nn; ++ if (err == NAME_MATCHES) ++ return 1; ++ ubifs_assert(err == NAME_LESS); ++ } ++ } ++} ++ ++/** ++ * fallible_matches_name - determine if a dent matches a given name. ++ * @c: UBIFS file-system description object ++ * @zbr: zbranch of dent ++ * @nm: name to match ++ * ++ * This is a "fallible" version of 'matches_name()' function which does not ++ * panic if the direntry/xentry referred by @zbr does not exist on the media. ++ * ++ * This function checks if xentry/direntry referred by zbranch @zbr matches name ++ * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr ++ * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA ++ * if xentry/direntry referred by @zbr does not exist on the media. A negative ++ * error code is returned in case of failure. ++ */ ++static int fallible_matches_name(struct ubifs_info *c, ++ struct ubifs_zbranch *zbr, ++ const struct qstr *nm) ++{ ++ struct ubifs_dent_node *dent; ++ int nlen, err; ++ ++ /* If possible, match against the dent in the leaf node cache */ ++ if (!zbr->leaf) { ++ dent = kmalloc(zbr->len, GFP_NOFS); ++ if (!dent) ++ return -ENOMEM; ++ ++ err = fallible_read_node(c, &zbr->key, zbr, dent); ++ if (err < 0) ++ goto out_free; ++ if (err == 0) { ++ /* The node was not present */ ++ err = NOT_ON_MEDIA; ++ goto out_free; ++ } ++ ubifs_assert(err == 1); ++ ++ err = lnc_add_directly(c, zbr, dent); ++ if (err) ++ goto out_free; ++ } else ++ dent = zbr->leaf; ++ ++ nlen = le16_to_cpu(dent->nlen); ++ err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); ++ if (err == 0) { ++ if (nlen == nm->len) ++ return NAME_MATCHES; ++ else if (nlen < nm->len) ++ return NAME_LESS; ++ else ++ return NAME_GREATER; ++ } else if (err < 0) ++ return NAME_LESS; ++ else ++ return NAME_GREATER; ++ ++out_free: ++ kfree(dent); ++ return err; ++} ++ ++/** ++ * fallible_resolve_collision - resolve a collision even if nodes are missing. ++ * @c: UBIFS file-system description object ++ * @key: key ++ * @zn: znode is returned here ++ * @n: branch number is passed and returned here ++ * @nm: name of directory entry ++ * @adding: indicates caller is adding a key to the TNC ++ * ++ * This is a "fallible" version of the 'resolve_collision()' function which ++ * does not panic if one of the nodes referred to by TNC does not exist on the ++ * media. This may happen when replaying the journal if a deleted node was ++ * Garbage-collected and the commit was not done. A branch that refers to a node ++ * that is not present is called a dangling branch. The following are the return ++ * codes for this function: ++ * o if @nm was found, %1 is returned and @zn and @n are set to the found ++ * branch; ++ * o if we are @adding and @nm was not found, %0 is returned; ++ * o if we are not @adding and @nm was not found, but a dangling branch was ++ * found, then %1 is returned and @zn and @n are set to the dangling branch; ++ * o a negative error code is returned in case of failure. ++ */ ++static int fallible_resolve_collision(struct ubifs_info *c, ++ const union ubifs_key *key, ++ struct ubifs_znode **zn, int *n, ++ const struct qstr *nm, int adding) ++{ ++ struct ubifs_znode *o_znode = NULL, *znode = *zn; ++ int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; ++ ++ cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); ++ if (unlikely(cmp < 0)) ++ return cmp; ++ if (cmp == NAME_MATCHES) ++ return 1; ++ if (cmp == NOT_ON_MEDIA) { ++ o_znode = znode; ++ o_n = nn; ++ /* ++ * We are unlucky and hit a dangling branch straight away. ++ * Now we do not really know where to go to find the needed ++ * branch - to the left or to the right. Well, let's try left. ++ */ ++ unsure = 1; ++ } else if (!adding) ++ unsure = 1; /* Remove a dangling branch wherever it is */ ++ ++ if (cmp == NAME_GREATER || unsure) { ++ /* Look left */ ++ while (1) { ++ err = tnc_prev(c, zn, n); ++ if (err == -ENOENT) { ++ ubifs_assert(*n == 0); ++ *n = -1; ++ break; ++ } ++ if (err < 0) ++ return err; ++ if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { ++ /* See comments in 'resolve_collision()' */ ++ if (*n == (*zn)->child_cnt - 1) { ++ err = tnc_next(c, zn, n); ++ if (err) { ++ /* Should be impossible */ ++ ubifs_assert(0); ++ if (err == -ENOENT) ++ err = -EINVAL; ++ return err; ++ } ++ ubifs_assert(*n == 0); ++ *n = -1; ++ } ++ break; ++ } ++ err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm); ++ if (err < 0) ++ return err; ++ if (err == NAME_MATCHES) ++ return 1; ++ if (err == NOT_ON_MEDIA) { ++ o_znode = *zn; ++ o_n = *n; ++ continue; ++ } ++ if (!adding) ++ continue; ++ if (err == NAME_LESS) ++ break; ++ else ++ unsure = 0; ++ } ++ } ++ ++ if (cmp == NAME_LESS || unsure) { ++ /* Look right */ ++ *zn = znode; ++ *n = nn; ++ while (1) { ++ err = tnc_next(c, &znode, &nn); ++ if (err == -ENOENT) ++ break; ++ if (err < 0) ++ return err; ++ if (keys_cmp(c, &znode->zbranch[nn].key, key)) ++ break; ++ err = fallible_matches_name(c, &znode->zbranch[nn], nm); ++ if (err < 0) ++ return err; ++ if (err == NAME_GREATER) ++ break; ++ *zn = znode; ++ *n = nn; ++ if (err == NAME_MATCHES) ++ return 1; ++ if (err == NOT_ON_MEDIA) { ++ o_znode = znode; ++ o_n = nn; ++ } ++ } ++ } ++ ++ /* Never match a dangling branch when adding */ ++ if (adding || !o_znode) ++ return 0; ++ ++ dbg_mnt("dangling match LEB %d:%d len %d %s", ++ o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, ++ o_znode->zbranch[o_n].len, DBGKEY(key)); ++ *zn = o_znode; ++ *n = o_n; ++ return 1; ++} ++ ++/** ++ * matches_position - determine if a zbranch matches a given position. ++ * @zbr: zbranch of dent ++ * @lnum: LEB number of dent to match ++ * @offs: offset of dent to match ++ * ++ * This function returns %1 if @lnum:@offs matches, and %0 otherwise. ++ */ ++static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs) ++{ ++ if (zbr->lnum == lnum && zbr->offs == offs) ++ return 1; ++ else ++ return 0; ++} ++ ++/** ++ * resolve_collision_directly - resolve a collision directly. ++ * @c: UBIFS file-system description object ++ * @key: key of directory entry ++ * @zn: znode is passed and returned here ++ * @n: zbranch number is passed and returned here ++ * @lnum: LEB number of dent node to match ++ * @offs: offset of dent node to match ++ * ++ * This function is used for "hashed" keys to make sure the found directory or ++ * extended attribute entry node is what was looked for. It is used when the ++ * flash address of the right node is known (@lnum:@offs) which makes it much ++ * easier to resolve collisions (no need to read entries and match full ++ * names). This function returns %1 and sets @zn and @n if the collision is ++ * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the ++ * previous directory entry. Otherwise a negative error code is returned. ++ */ ++static int resolve_collision_directly(struct ubifs_info *c, ++ const union ubifs_key *key, ++ struct ubifs_znode **zn, int *n, ++ int lnum, int offs) ++{ ++ struct ubifs_znode *znode; ++ int nn, err; ++ ++ znode = *zn; ++ nn = *n; ++ if (matches_position(&znode->zbranch[nn], lnum, offs)) ++ return 1; ++ ++ /* Look left */ ++ while (1) { ++ err = tnc_prev(c, &znode, &nn); ++ if (err == -ENOENT) ++ break; ++ if (err < 0) ++ return err; ++ if (keys_cmp(c, &znode->zbranch[nn].key, key)) ++ break; ++ if (matches_position(&znode->zbranch[nn], lnum, offs)) { ++ *zn = znode; ++ *n = nn; ++ return 1; ++ } ++ } ++ ++ /* Look right */ ++ znode = *zn; ++ nn = *n; ++ while (1) { ++ err = tnc_next(c, &znode, &nn); ++ if (err == -ENOENT) ++ return 0; ++ if (err < 0) ++ return err; ++ if (keys_cmp(c, &znode->zbranch[nn].key, key)) ++ return 0; ++ *zn = znode; ++ *n = nn; ++ if (matches_position(&znode->zbranch[nn], lnum, offs)) ++ return 1; ++ } ++} ++ ++/** ++ * dirty_cow_bottom_up - dirty a znode and its ancestors. ++ * @c: UBIFS file-system description object ++ * @znode: znode to dirty ++ * ++ * If we do not have a unique key that resides in a znode, then we cannot ++ * dirty that znode from the top down (i.e. by using lookup_level0_dirty) ++ * This function records the path back to the last dirty ancestor, and then ++ * dirties the znodes on that path. ++ */ ++static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, ++ struct ubifs_znode *znode) ++{ ++ struct ubifs_znode *zp; ++ int *path = c->bottom_up_buf, p = 0; ++ ++ ubifs_assert(c->zroot.znode); ++ ubifs_assert(znode); ++ if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) { ++ kfree(c->bottom_up_buf); ++ c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int), ++ GFP_NOFS); ++ if (!c->bottom_up_buf) ++ return ERR_PTR(-ENOMEM); ++ path = c->bottom_up_buf; ++ } ++ if (c->zroot.znode->level) { ++ /* Go up until parent is dirty */ ++ while (1) { ++ int n; ++ ++ zp = znode->parent; ++ if (!zp) ++ break; ++ n = znode->iip; ++ ubifs_assert(p < c->zroot.znode->level); ++ path[p++] = n; ++ if (!zp->cnext && ubifs_zn_dirty(znode)) ++ break; ++ znode = zp; ++ } ++ } ++ ++ /* Come back down, dirtying as we go */ ++ while (1) { ++ struct ubifs_zbranch *zbr; ++ ++ zp = znode->parent; ++ if (zp) { ++ ubifs_assert(path[p - 1] >= 0); ++ ubifs_assert(path[p - 1] < zp->child_cnt); ++ zbr = &zp->zbranch[path[--p]]; ++ znode = dirty_cow_znode(c, zbr); ++ } else { ++ ubifs_assert(znode == c->zroot.znode); ++ znode = dirty_cow_znode(c, &c->zroot); ++ } ++ if (IS_ERR(znode) || !p) ++ break; ++ ubifs_assert(path[p - 1] >= 0); ++ ubifs_assert(path[p - 1] < znode->child_cnt); ++ znode = znode->zbranch[path[p - 1]].znode; ++ } ++ ++ return znode; ++} ++ ++/** ++ * ubifs_lookup_level0 - search for zero-level znode. ++ * @c: UBIFS file-system description object ++ * @key: key to lookup ++ * @zn: znode is returned here ++ * @n: znode branch slot number is returned here ++ * ++ * This function looks up the TNC tree and search for zero-level znode which ++ * refers key @key. The found zero-level znode is returned in @zn. There are 3 ++ * cases: ++ * o exact match, i.e. the found zero-level znode contains key @key, then %1 ++ * is returned and slot number of the matched branch is stored in @n; ++ * o not exact match, which means that zero-level znode does not contain ++ * @key, then %0 is returned and slot number of the closed branch is stored ++ * in @n; ++ * o @key is so small that it is even less than the lowest key of the ++ * leftmost zero-level node, then %0 is returned and %0 is stored in @n. ++ * ++ * Note, when the TNC tree is traversed, some znodes may be absent, then this ++ * function reads corresponding indexing nodes and inserts them to TNC. In ++ * case of failure, a negative error code is returned. ++ */ ++int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, ++ struct ubifs_znode **zn, int *n) ++{ ++ int err, exact; ++ struct ubifs_znode *znode; ++ unsigned long time = get_seconds(); ++ ++ dbg_tnc("search key %s", DBGKEY(key)); ++ ++ znode = c->zroot.znode; ++ if (unlikely(!znode)) { ++ znode = ubifs_load_znode(c, &c->zroot, NULL, 0); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ } ++ ++ znode->time = time; ++ ++ while (1) { ++ struct ubifs_zbranch *zbr; ++ ++ exact = ubifs_search_zbranch(c, znode, key, n); ++ ++ if (znode->level == 0) ++ break; ++ ++ if (*n < 0) ++ *n = 0; ++ zbr = &znode->zbranch[*n]; ++ ++ if (zbr->znode) { ++ znode->time = time; ++ znode = zbr->znode; ++ continue; ++ } ++ ++ /* znode is not in TNC cache, load it from the media */ ++ znode = ubifs_load_znode(c, zbr, znode, *n); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ } ++ ++ *zn = znode; ++ if (exact || !is_hash_key(c, key) || *n != -1) { ++ dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); ++ return exact; ++ } ++ ++ /* ++ * Here is a tricky place. We have not found the key and this is a ++ * "hashed" key, which may collide. The rest of the code deals with ++ * situations like this: ++ * ++ * | 3 | 5 | ++ * / \ ++ * | 3 | 5 | | 6 | 7 | (x) ++ * ++ * Or more a complex example: ++ * ++ * | 1 | 5 | ++ * / \ ++ * | 1 | 3 | | 5 | 8 | ++ * \ / ++ * | 5 | 5 | | 6 | 7 | (x) ++ * ++ * In the examples, if we are looking for key "5", we may reach nodes ++ * marked with "(x)". In this case what we have do is to look at the ++ * left and see if there is "5" key there. If there is, we have to ++ * return it. ++ * ++ * Note, this whole situation is possible because we allow to have ++ * elements which are equivalent to the next key in the parent in the ++ * children of current znode. For example, this happens if we split a ++ * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something ++ * like this: ++ * | 3 | 5 | ++ * / \ ++ * | 3 | 5 | | 5 | 6 | 7 | ++ * ^ ++ * And this becomes what is at the first "picture" after key "5" marked ++ * with "^" is removed. What could be done is we could prohibit ++ * splitting in the middle of the colliding sequence. Also, when ++ * removing the leftmost key, we would have to correct the key of the ++ * parent node, which would introduce additional complications. Namely, ++ * if we changed the leftmost key of the parent znode, the garbage ++ * collector would be unable to find it (GC is doing this when GC'ing ++ * indexing LEBs). Although we already have an additional RB-tree where ++ * we save such changed znodes (see 'ins_clr_old_idx_znode()') until ++ * after the commit. But anyway, this does not look easy to implement ++ * so we did not try this. ++ */ ++ err = tnc_prev(c, &znode, n); ++ if (err == -ENOENT) { ++ dbg_tnc("found 0, lvl %d, n -1", znode->level); ++ *n = -1; ++ return 0; ++ } ++ if (unlikely(err < 0)) ++ return err; ++ if (keys_cmp(c, key, &znode->zbranch[*n].key)) { ++ dbg_tnc("found 0, lvl %d, n -1", znode->level); ++ *n = -1; ++ return 0; ++ } ++ ++ dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); ++ *zn = znode; ++ return 1; ++} ++ ++/** ++ * lookup_level0_dirty - search for zero-level znode dirtying. ++ * @c: UBIFS file-system description object ++ * @key: key to lookup ++ * @zn: znode is returned here ++ * @n: znode branch slot number is returned here ++ * ++ * This function looks up the TNC tree and search for zero-level znode which ++ * refers key @key. The found zero-level znode is returned in @zn. There are 3 ++ * cases: ++ * o exact match, i.e. the found zero-level znode contains key @key, then %1 ++ * is returned and slot number of the matched branch is stored in @n; ++ * o not exact match, which means that zero-level znode does not contain @key ++ * then %0 is returned and slot number of the closed branch is stored in ++ * @n; ++ * o @key is so small that it is even less than the lowest key of the ++ * leftmost zero-level node, then %0 is returned and %-1 is stored in @n. ++ * ++ * Additionally all znodes in the path from the root to the located zero-level ++ * znode are marked as dirty. ++ * ++ * Note, when the TNC tree is traversed, some znodes may be absent, then this ++ * function reads corresponding indexing nodes and inserts them to TNC. In ++ * case of failure, a negative error code is returned. ++ */ ++static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, ++ struct ubifs_znode **zn, int *n) ++{ ++ int err, exact; ++ struct ubifs_znode *znode; ++ unsigned long time = get_seconds(); ++ ++ dbg_tnc("search and dirty key %s", DBGKEY(key)); ++ ++ znode = c->zroot.znode; ++ if (unlikely(!znode)) { ++ znode = ubifs_load_znode(c, &c->zroot, NULL, 0); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ } ++ ++ znode = dirty_cow_znode(c, &c->zroot); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ ++ znode->time = time; ++ ++ while (1) { ++ struct ubifs_zbranch *zbr; ++ ++ exact = ubifs_search_zbranch(c, znode, key, n); ++ ++ if (znode->level == 0) ++ break; ++ ++ if (*n < 0) ++ *n = 0; ++ zbr = &znode->zbranch[*n]; ++ ++ if (zbr->znode) { ++ znode->time = time; ++ znode = dirty_cow_znode(c, zbr); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ continue; ++ } ++ ++ /* znode is not in TNC cache, load it from the media */ ++ znode = ubifs_load_znode(c, zbr, znode, *n); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ znode = dirty_cow_znode(c, zbr); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ } ++ ++ *zn = znode; ++ if (exact || !is_hash_key(c, key) || *n != -1) { ++ dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); ++ return exact; ++ } ++ ++ /* ++ * See huge comment at 'lookup_level0_dirty()' what is the rest of the ++ * code. ++ */ ++ err = tnc_prev(c, &znode, n); ++ if (err == -ENOENT) { ++ *n = -1; ++ dbg_tnc("found 0, lvl %d, n -1", znode->level); ++ return 0; ++ } ++ if (unlikely(err < 0)) ++ return err; ++ if (keys_cmp(c, key, &znode->zbranch[*n].key)) { ++ *n = -1; ++ dbg_tnc("found 0, lvl %d, n -1", znode->level); ++ return 0; ++ } ++ ++ if (znode->cnext || !ubifs_zn_dirty(znode)) { ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ } ++ ++ dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); ++ *zn = znode; ++ return 1; ++} ++ ++/** ++ * maybe_leb_gced - determine if a LEB may have been garbage collected. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number ++ * @gc_seq1: garbage collection sequence number ++ * ++ * This function determines if @lnum may have been garbage collected since ++ * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise ++ * %0 is returned. ++ */ ++static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) ++{ ++ int gc_seq2, gced_lnum; ++ ++ gced_lnum = c->gced_lnum; ++ smp_rmb(); ++ gc_seq2 = c->gc_seq; ++ /* Same seq means no GC */ ++ if (gc_seq1 == gc_seq2) ++ return 0; ++ /* Different by more than 1 means we don't know */ ++ if (gc_seq1 + 1 != gc_seq2) ++ return 1; ++ /* ++ * We have seen the sequence number has increased by 1. Now we need to ++ * be sure we read the right LEB number, so read it again. ++ */ ++ smp_rmb(); ++ if (gced_lnum != c->gced_lnum) ++ return 1; ++ /* Finally we can check lnum */ ++ if (gced_lnum == lnum) ++ return 1; ++ return 0; ++} ++ ++/** ++ * ubifs_tnc_locate - look up a file-system node and return it and its location. ++ * @c: UBIFS file-system description object ++ * @key: node key to lookup ++ * @node: the node is returned here ++ * @lnum: LEB number is returned here ++ * @offs: offset is returned here ++ * ++ * This function look up and reads node with key @key. The caller has to make ++ * sure the @node buffer is large enough to fit the node. Returns zero in case ++ * of success, %-ENOENT if the node was not found, and a negative error code in ++ * case of failure. The node location can be returned in @lnum and @offs. ++ */ ++int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, ++ void *node, int *lnum, int *offs) ++{ ++ int found, n, err, safely = 0, gc_seq1; ++ struct ubifs_znode *znode; ++ struct ubifs_zbranch zbr, *zt; ++ ++again: ++ mutex_lock(&c->tnc_mutex); ++ found = ubifs_lookup_level0(c, key, &znode, &n); ++ if (!found) { ++ err = -ENOENT; ++ goto out; ++ } else if (found < 0) { ++ err = found; ++ goto out; ++ } ++ zt = &znode->zbranch[n]; ++ if (lnum) { ++ *lnum = zt->lnum; ++ *offs = zt->offs; ++ } ++ if (is_hash_key(c, key)) { ++ /* ++ * In this case the leaf node cache gets used, so we pass the ++ * address of the zbranch and keep the mutex locked ++ */ ++ err = tnc_read_node_nm(c, zt, node); ++ goto out; ++ } ++ if (safely) { ++ err = ubifs_tnc_read_node(c, zt, node); ++ goto out; ++ } ++ /* Drop the TNC mutex prematurely and race with garbage collection */ ++ zbr = znode->zbranch[n]; ++ gc_seq1 = c->gc_seq; ++ mutex_unlock(&c->tnc_mutex); ++ ++ if (ubifs_get_wbuf(c, zbr.lnum)) { ++ /* We do not GC journal heads */ ++ err = ubifs_tnc_read_node(c, &zbr, node); ++ return err; ++ } ++ ++ err = fallible_read_node(c, key, &zbr, node); ++ if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { ++ /* ++ * The node may have been GC'ed out from under us so try again ++ * while keeping the TNC mutex locked. ++ */ ++ safely = 1; ++ goto again; ++ } ++ return 0; ++ ++out: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_tnc_get_bu_keys - lookup keys for bulk-read. ++ * @c: UBIFS file-system description object ++ * @bu: bulk-read parameters and results ++ * ++ * Lookup consecutive data node keys for the same inode that reside ++ * consecutively in the same LEB. This function returns zero in case of success ++ * and a negative error code in case of failure. ++ * ++ * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function ++ * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares ++ * maximum possible amount of nodes for bulk-read. ++ */ ++int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) ++{ ++ int n, err = 0, lnum = -1, uninitialized_var(offs); ++ int uninitialized_var(len); ++ unsigned int block = key_block(c, &bu->key); ++ struct ubifs_znode *znode; ++ ++ bu->cnt = 0; ++ bu->blk_cnt = 0; ++ bu->eof = 0; ++ ++ mutex_lock(&c->tnc_mutex); ++ /* Find first key */ ++ err = ubifs_lookup_level0(c, &bu->key, &znode, &n); ++ if (err < 0) ++ goto out; ++ if (err) { ++ /* Key found */ ++ len = znode->zbranch[n].len; ++ /* The buffer must be big enough for at least 1 node */ ++ if (len > bu->buf_len) { ++ err = -EINVAL; ++ goto out; ++ } ++ /* Add this key */ ++ bu->zbranch[bu->cnt++] = znode->zbranch[n]; ++ bu->blk_cnt += 1; ++ lnum = znode->zbranch[n].lnum; ++ offs = ALIGN(znode->zbranch[n].offs + len, 8); ++ } ++ while (1) { ++ struct ubifs_zbranch *zbr; ++ union ubifs_key *key; ++ unsigned int next_block; ++ ++ /* Find next key */ ++ err = tnc_next(c, &znode, &n); ++ if (err) ++ goto out; ++ zbr = &znode->zbranch[n]; ++ key = &zbr->key; ++ /* See if there is another data key for this file */ ++ if (key_inum(c, key) != key_inum(c, &bu->key) || ++ key_type(c, key) != UBIFS_DATA_KEY) { ++ err = -ENOENT; ++ goto out; ++ } ++ if (lnum < 0) { ++ /* First key found */ ++ lnum = zbr->lnum; ++ offs = ALIGN(zbr->offs + zbr->len, 8); ++ len = zbr->len; ++ if (len > bu->buf_len) { ++ err = -EINVAL; ++ goto out; ++ } ++ } else { ++ /* ++ * The data nodes must be in consecutive positions in ++ * the same LEB. ++ */ ++ if (zbr->lnum != lnum || zbr->offs != offs) ++ goto out; ++ offs += ALIGN(zbr->len, 8); ++ len = ALIGN(len, 8) + zbr->len; ++ /* Must not exceed buffer length */ ++ if (len > bu->buf_len) ++ goto out; ++ } ++ /* Allow for holes */ ++ next_block = key_block(c, key); ++ bu->blk_cnt += (next_block - block - 1); ++ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) ++ goto out; ++ block = next_block; ++ /* Add this key */ ++ bu->zbranch[bu->cnt++] = *zbr; ++ bu->blk_cnt += 1; ++ /* See if we have room for more */ ++ if (bu->cnt >= UBIFS_MAX_BULK_READ) ++ goto out; ++ if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) ++ goto out; ++ } ++out: ++ if (err == -ENOENT) { ++ bu->eof = 1; ++ err = 0; ++ } ++ bu->gc_seq = c->gc_seq; ++ mutex_unlock(&c->tnc_mutex); ++ if (err) ++ return err; ++ /* ++ * An enormous hole could cause bulk-read to encompass too many ++ * page cache pages, so limit the number here. ++ */ ++ if (bu->blk_cnt > UBIFS_MAX_BULK_READ) ++ bu->blk_cnt = UBIFS_MAX_BULK_READ; ++ /* ++ * Ensure that bulk-read covers a whole number of page cache ++ * pages. ++ */ ++ if (UBIFS_BLOCKS_PER_PAGE == 1 || ++ !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1))) ++ return 0; ++ if (bu->eof) { ++ /* At the end of file we can round up */ ++ bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1; ++ return 0; ++ } ++ /* Exclude data nodes that do not make up a whole page cache page */ ++ block = key_block(c, &bu->key) + bu->blk_cnt; ++ block &= ~(UBIFS_BLOCKS_PER_PAGE - 1); ++ while (bu->cnt) { ++ if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block) ++ break; ++ bu->cnt -= 1; ++ } ++ return 0; ++} ++ ++/** ++ * read_wbuf - bulk-read from a LEB with a wbuf. ++ * @wbuf: wbuf that may overlap the read ++ * @buf: buffer into which to read ++ * @len: read length ++ * @lnum: LEB number from which to read ++ * @offs: offset from which to read ++ * ++ * This functions returns %0 on success or a negative error code on failure. ++ */ ++static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, ++ int offs) ++{ ++ const struct ubifs_info *c = wbuf->c; ++ int rlen, overlap; ++ ++ dbg_io("LEB %d:%d, length %d", lnum, offs, len); ++ ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ++ ubifs_assert(!(offs & 7) && offs < c->leb_size); ++ ubifs_assert(offs + len <= c->leb_size); ++ ++ spin_lock(&wbuf->lock); ++ overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); ++ if (!overlap) { ++ /* We may safely unlock the write-buffer and read the data */ ++ spin_unlock(&wbuf->lock); ++ return ubi_read(c->ubi, lnum, buf, offs, len); ++ } ++ ++ /* Don't read under wbuf */ ++ rlen = wbuf->offs - offs; ++ if (rlen < 0) ++ rlen = 0; ++ ++ /* Copy the rest from the write-buffer */ ++ memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); ++ spin_unlock(&wbuf->lock); ++ ++ if (rlen > 0) ++ /* Read everything that goes before write-buffer */ ++ return ubi_read(c->ubi, lnum, buf, offs, rlen); ++ ++ return 0; ++} ++ ++/** ++ * validate_data_node - validate data nodes for bulk-read. ++ * @c: UBIFS file-system description object ++ * @buf: buffer containing data node to validate ++ * @zbr: zbranch of data node to validate ++ * ++ * This functions returns %0 on success or a negative error code on failure. ++ */ ++static int validate_data_node(struct ubifs_info *c, void *buf, ++ struct ubifs_zbranch *zbr) ++{ ++ union ubifs_key key1; ++ struct ubifs_ch *ch = buf; ++ int err, len; ++ ++ if (ch->node_type != UBIFS_DATA_NODE) { ++ ubifs_err("bad node type (%d but expected %d)", ++ ch->node_type, UBIFS_DATA_NODE); ++ goto out_err; ++ } ++ ++ err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); ++ if (err) { ++ ubifs_err("expected node type %d", UBIFS_DATA_NODE); ++ goto out; ++ } ++ ++ len = le32_to_cpu(ch->len); ++ if (len != zbr->len) { ++ ubifs_err("bad node length %d, expected %d", len, zbr->len); ++ goto out_err; ++ } ++ ++ /* Make sure the key of the read node is correct */ ++ key_read(c, buf + UBIFS_KEY_OFFSET, &key1); ++ if (!keys_eq(c, &zbr->key, &key1)) { ++ ubifs_err("bad key in node at LEB %d:%d", ++ zbr->lnum, zbr->offs); ++ dbg_tnc("looked for key %s found node's key %s", ++ DBGKEY(&zbr->key), DBGKEY1(&key1)); ++ goto out_err; ++ } ++ ++ return 0; ++ ++out_err: ++ err = -EINVAL; ++out: ++ ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); ++ dbg_dump_node(c, buf); ++ dbg_dump_stack(); ++ return err; ++} ++ ++/** ++ * ubifs_tnc_bulk_read - read a number of data nodes in one go. ++ * @c: UBIFS file-system description object ++ * @bu: bulk-read parameters and results ++ * ++ * This functions reads and validates the data nodes that were identified by the ++ * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success, ++ * -EAGAIN to indicate a race with GC, or another negative error code on ++ * failure. ++ */ ++int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) ++{ ++ int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i; ++ struct ubifs_wbuf *wbuf; ++ void *buf; ++ ++ len = bu->zbranch[bu->cnt - 1].offs; ++ len += bu->zbranch[bu->cnt - 1].len - offs; ++ if (len > bu->buf_len) { ++ ubifs_err("buffer too small %d vs %d", bu->buf_len, len); ++ return -EINVAL; ++ } ++ ++ /* Do the read */ ++ wbuf = ubifs_get_wbuf(c, lnum); ++ if (wbuf) ++ err = read_wbuf(wbuf, bu->buf, len, lnum, offs); ++ else ++ err = ubi_read(c->ubi, lnum, bu->buf, offs, len); ++ ++ /* Check for a race with GC */ ++ if (maybe_leb_gced(c, lnum, bu->gc_seq)) ++ return -EAGAIN; ++ ++ if (err && err != -EBADMSG) { ++ ubifs_err("failed to read from LEB %d:%d, error %d", ++ lnum, offs, err); ++ dbg_dump_stack(); ++ dbg_tnc("key %s", DBGKEY(&bu->key)); ++ return err; ++ } ++ ++ /* Validate the nodes read */ ++ buf = bu->buf; ++ for (i = 0; i < bu->cnt; i++) { ++ err = validate_data_node(c, buf, &bu->zbranch[i]); ++ if (err) ++ return err; ++ buf = buf + ALIGN(bu->zbranch[i].len, 8); ++ } ++ ++ return 0; ++} ++ ++/** ++ * do_lookup_nm- look up a "hashed" node. ++ * @c: UBIFS file-system description object ++ * @key: node key to lookup ++ * @node: the node is returned here ++ * @nm: node name ++ * ++ * This function look up and reads a node which contains name hash in the key. ++ * Since the hash may have collisions, there may be many nodes with the same ++ * key, so we have to sequentially look to all of them until the needed one is ++ * found. This function returns zero in case of success, %-ENOENT if the node ++ * was not found, and a negative error code in case of failure. ++ */ ++static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, ++ void *node, const struct qstr *nm) ++{ ++ int found, n, err; ++ struct ubifs_znode *znode; ++ ++ dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); ++ mutex_lock(&c->tnc_mutex); ++ found = ubifs_lookup_level0(c, key, &znode, &n); ++ if (!found) { ++ err = -ENOENT; ++ goto out_unlock; ++ } else if (found < 0) { ++ err = found; ++ goto out_unlock; ++ } ++ ++ ubifs_assert(n >= 0); ++ ++ err = resolve_collision(c, key, &znode, &n, nm); ++ dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ if (err == 0) { ++ err = -ENOENT; ++ goto out_unlock; ++ } ++ ++ err = tnc_read_node_nm(c, &znode->zbranch[n], node); ++ ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_tnc_lookup_nm - look up a "hashed" node. ++ * @c: UBIFS file-system description object ++ * @key: node key to lookup ++ * @node: the node is returned here ++ * @nm: node name ++ * ++ * This function look up and reads a node which contains name hash in the key. ++ * Since the hash may have collisions, there may be many nodes with the same ++ * key, so we have to sequentially look to all of them until the needed one is ++ * found. This function returns zero in case of success, %-ENOENT if the node ++ * was not found, and a negative error code in case of failure. ++ */ ++int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, ++ void *node, const struct qstr *nm) ++{ ++ int err, len; ++ const struct ubifs_dent_node *dent = node; ++ ++ /* ++ * We assume that in most of the cases there are no name collisions and ++ * 'ubifs_tnc_lookup()' returns us the right direntry. ++ */ ++ err = ubifs_tnc_lookup(c, key, node); ++ if (err) ++ return err; ++ ++ len = le16_to_cpu(dent->nlen); ++ if (nm->len == len && !memcmp(dent->name, nm->name, len)) ++ return 0; ++ ++ /* ++ * Unluckily, there are hash collisions and we have to iterate over ++ * them look at each direntry with colliding name hash sequentially. ++ */ ++ return do_lookup_nm(c, key, node, nm); ++} ++ ++/** ++ * correct_parent_keys - correct parent znodes' keys. ++ * @c: UBIFS file-system description object ++ * @znode: znode to correct parent znodes for ++ * ++ * This is a helper function for 'tnc_insert()'. When the key of the leftmost ++ * zbranch changes, keys of parent znodes have to be corrected. This helper ++ * function is called in such situations and corrects the keys if needed. ++ */ ++static void correct_parent_keys(const struct ubifs_info *c, ++ struct ubifs_znode *znode) ++{ ++ union ubifs_key *key, *key1; ++ ++ ubifs_assert(znode->parent); ++ ubifs_assert(znode->iip == 0); ++ ++ key = &znode->zbranch[0].key; ++ key1 = &znode->parent->zbranch[0].key; ++ ++ while (keys_cmp(c, key, key1) < 0) { ++ key_copy(c, key, key1); ++ znode = znode->parent; ++ znode->alt = 1; ++ if (!znode->parent || znode->iip) ++ break; ++ key1 = &znode->parent->zbranch[0].key; ++ } ++} ++ ++/** ++ * insert_zbranch - insert a zbranch into a znode. ++ * @znode: znode into which to insert ++ * @zbr: zbranch to insert ++ * @n: slot number to insert to ++ * ++ * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in ++ * znode's array of zbranches and keeps zbranches consolidated, so when a new ++ * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th ++ * slot, zbranches starting from @n have to be moved right. ++ */ ++static void insert_zbranch(struct ubifs_znode *znode, ++ const struct ubifs_zbranch *zbr, int n) ++{ ++ int i; ++ ++ ubifs_assert(ubifs_zn_dirty(znode)); ++ ++ if (znode->level) { ++ for (i = znode->child_cnt; i > n; i--) { ++ znode->zbranch[i] = znode->zbranch[i - 1]; ++ if (znode->zbranch[i].znode) ++ znode->zbranch[i].znode->iip = i; ++ } ++ if (zbr->znode) ++ zbr->znode->iip = n; ++ } else ++ for (i = znode->child_cnt; i > n; i--) ++ znode->zbranch[i] = znode->zbranch[i - 1]; ++ ++ znode->zbranch[n] = *zbr; ++ znode->child_cnt += 1; ++ ++ /* ++ * After inserting at slot zero, the lower bound of the key range of ++ * this znode may have changed. If this znode is subsequently split ++ * then the upper bound of the key range may change, and furthermore ++ * it could change to be lower than the original lower bound. If that ++ * happens, then it will no longer be possible to find this znode in the ++ * TNC using the key from the index node on flash. That is bad because ++ * if it is not found, we will assume it is obsolete and may overwrite ++ * it. Then if there is an unclean unmount, we will start using the ++ * old index which will be broken. ++ * ++ * So we first mark znodes that have insertions at slot zero, and then ++ * if they are split we add their lnum/offs to the old_idx tree. ++ */ ++ if (n == 0) ++ znode->alt = 1; ++} ++ ++/** ++ * tnc_insert - insert a node into TNC. ++ * @c: UBIFS file-system description object ++ * @znode: znode to insert into ++ * @zbr: branch to insert ++ * @n: slot number to insert new zbranch to ++ * ++ * This function inserts a new node described by @zbr into znode @znode. If ++ * znode does not have a free slot for new zbranch, it is split. Parent znodes ++ * are splat as well if needed. Returns zero in case of success or a negative ++ * error code in case of failure. ++ */ ++static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, ++ struct ubifs_zbranch *zbr, int n) ++{ ++ struct ubifs_znode *zn, *zi, *zp; ++ int i, keep, move, appending = 0; ++ union ubifs_key *key = &zbr->key, *key1; ++ ++ ubifs_assert(n >= 0 && n <= c->fanout); ++ ++ /* Implement naive insert for now */ ++again: ++ zp = znode->parent; ++ if (znode->child_cnt < c->fanout) { ++ ubifs_assert(n != c->fanout); ++ dbg_tnc("inserted at %d level %d, key %s", n, znode->level, ++ DBGKEY(key)); ++ ++ insert_zbranch(znode, zbr, n); ++ ++ /* Ensure parent's key is correct */ ++ if (n == 0 && zp && znode->iip == 0) ++ correct_parent_keys(c, znode); ++ ++ return 0; ++ } ++ ++ /* ++ * Unfortunately, @znode does not have more empty slots and we have to ++ * split it. ++ */ ++ dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); ++ ++ if (znode->alt) ++ /* ++ * We can no longer be sure of finding this znode by key, so we ++ * record it in the old_idx tree. ++ */ ++ ins_clr_old_idx_znode(c, znode); ++ ++ zn = kzalloc(c->max_znode_sz, GFP_NOFS); ++ if (!zn) ++ return -ENOMEM; ++ zn->parent = zp; ++ zn->level = znode->level; ++ ++ /* Decide where to split */ ++ if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) { ++ /* Try not to split consecutive data keys */ ++ if (n == c->fanout) { ++ key1 = &znode->zbranch[n - 1].key; ++ if (key_inum(c, key1) == key_inum(c, key) && ++ key_type(c, key1) == UBIFS_DATA_KEY) ++ appending = 1; ++ } else ++ goto check_split; ++ } else if (appending && n != c->fanout) { ++ /* Try not to split consecutive data keys */ ++ appending = 0; ++check_split: ++ if (n >= (c->fanout + 1) / 2) { ++ key1 = &znode->zbranch[0].key; ++ if (key_inum(c, key1) == key_inum(c, key) && ++ key_type(c, key1) == UBIFS_DATA_KEY) { ++ key1 = &znode->zbranch[n].key; ++ if (key_inum(c, key1) != key_inum(c, key) || ++ key_type(c, key1) != UBIFS_DATA_KEY) { ++ keep = n; ++ move = c->fanout - keep; ++ zi = znode; ++ goto do_split; ++ } ++ } ++ } ++ } ++ ++ if (appending) { ++ keep = c->fanout; ++ move = 0; ++ } else { ++ keep = (c->fanout + 1) / 2; ++ move = c->fanout - keep; ++ } ++ ++ /* ++ * Although we don't at present, we could look at the neighbors and see ++ * if we can move some zbranches there. ++ */ ++ ++ if (n < keep) { ++ /* Insert into existing znode */ ++ zi = znode; ++ move += 1; ++ keep -= 1; ++ } else { ++ /* Insert into new znode */ ++ zi = zn; ++ n -= keep; ++ /* Re-parent */ ++ if (zn->level != 0) ++ zbr->znode->parent = zn; ++ } ++ ++do_split: ++ ++ __set_bit(DIRTY_ZNODE, &zn->flags); ++ atomic_long_inc(&c->dirty_zn_cnt); ++ ++ zn->child_cnt = move; ++ znode->child_cnt = keep; ++ ++ dbg_tnc("moving %d, keeping %d", move, keep); ++ ++ /* Move zbranch */ ++ for (i = 0; i < move; i++) { ++ zn->zbranch[i] = znode->zbranch[keep + i]; ++ /* Re-parent */ ++ if (zn->level != 0) ++ if (zn->zbranch[i].znode) { ++ zn->zbranch[i].znode->parent = zn; ++ zn->zbranch[i].znode->iip = i; ++ } ++ } ++ ++ /* Insert new key and branch */ ++ dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); ++ ++ insert_zbranch(zi, zbr, n); ++ ++ /* Insert new znode (produced by spitting) into the parent */ ++ if (zp) { ++ if (n == 0 && zi == znode && znode->iip == 0) ++ correct_parent_keys(c, znode); ++ ++ /* Locate insertion point */ ++ n = znode->iip + 1; ++ ++ /* Tail recursion */ ++ zbr->key = zn->zbranch[0].key; ++ zbr->znode = zn; ++ zbr->lnum = 0; ++ zbr->offs = 0; ++ zbr->len = 0; ++ znode = zp; ++ ++ goto again; ++ } ++ ++ /* We have to split root znode */ ++ dbg_tnc("creating new zroot at level %d", znode->level + 1); ++ ++ zi = kzalloc(c->max_znode_sz, GFP_NOFS); ++ if (!zi) ++ return -ENOMEM; ++ ++ zi->child_cnt = 2; ++ zi->level = znode->level + 1; ++ ++ __set_bit(DIRTY_ZNODE, &zi->flags); ++ atomic_long_inc(&c->dirty_zn_cnt); ++ ++ zi->zbranch[0].key = znode->zbranch[0].key; ++ zi->zbranch[0].znode = znode; ++ zi->zbranch[0].lnum = c->zroot.lnum; ++ zi->zbranch[0].offs = c->zroot.offs; ++ zi->zbranch[0].len = c->zroot.len; ++ zi->zbranch[1].key = zn->zbranch[0].key; ++ zi->zbranch[1].znode = zn; ++ ++ c->zroot.lnum = 0; ++ c->zroot.offs = 0; ++ c->zroot.len = 0; ++ c->zroot.znode = zi; ++ ++ zn->parent = zi; ++ zn->iip = 1; ++ znode->parent = zi; ++ znode->iip = 0; ++ ++ return 0; ++} ++ ++/** ++ * ubifs_tnc_add - add a node to TNC. ++ * @c: UBIFS file-system description object ++ * @key: key to add ++ * @lnum: LEB number of node ++ * @offs: node offset ++ * @len: node length ++ * ++ * This function adds a node with key @key to TNC. The node may be new or it may ++ * obsolete some existing one. Returns %0 on success or negative error code on ++ * failure. ++ */ ++int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, ++ int offs, int len) ++{ ++ int found, n, err = 0; ++ struct ubifs_znode *znode; ++ ++ mutex_lock(&c->tnc_mutex); ++ dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); ++ found = lookup_level0_dirty(c, key, &znode, &n); ++ if (!found) { ++ struct ubifs_zbranch zbr; ++ ++ zbr.znode = NULL; ++ zbr.lnum = lnum; ++ zbr.offs = offs; ++ zbr.len = len; ++ key_copy(c, key, &zbr.key); ++ err = tnc_insert(c, znode, &zbr, n + 1); ++ } else if (found == 1) { ++ struct ubifs_zbranch *zbr = &znode->zbranch[n]; ++ ++ lnc_free(zbr); ++ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); ++ zbr->lnum = lnum; ++ zbr->offs = offs; ++ zbr->len = len; ++ } else ++ err = found; ++ if (!err) ++ err = dbg_check_tnc(c, 0); ++ mutex_unlock(&c->tnc_mutex); ++ ++ return err; ++} ++ ++/** ++ * ubifs_tnc_replace - replace a node in the TNC only if the old node is found. ++ * @c: UBIFS file-system description object ++ * @key: key to add ++ * @old_lnum: LEB number of old node ++ * @old_offs: old node offset ++ * @lnum: LEB number of node ++ * @offs: node offset ++ * @len: node length ++ * ++ * This function replaces a node with key @key in the TNC only if the old node ++ * is found. This function is called by garbage collection when node are moved. ++ * Returns %0 on success or negative error code on failure. ++ */ ++int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, ++ int old_lnum, int old_offs, int lnum, int offs, int len) ++{ ++ int found, n, err = 0; ++ struct ubifs_znode *znode; ++ ++ mutex_lock(&c->tnc_mutex); ++ dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, ++ old_offs, lnum, offs, len, DBGKEY(key)); ++ found = lookup_level0_dirty(c, key, &znode, &n); ++ if (found < 0) { ++ err = found; ++ goto out_unlock; ++ } ++ ++ if (found == 1) { ++ struct ubifs_zbranch *zbr = &znode->zbranch[n]; ++ ++ found = 0; ++ if (zbr->lnum == old_lnum && zbr->offs == old_offs) { ++ lnc_free(zbr); ++ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); ++ if (err) ++ goto out_unlock; ++ zbr->lnum = lnum; ++ zbr->offs = offs; ++ zbr->len = len; ++ found = 1; ++ } else if (is_hash_key(c, key)) { ++ found = resolve_collision_directly(c, key, &znode, &n, ++ old_lnum, old_offs); ++ dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d", ++ found, znode, n, old_lnum, old_offs); ++ if (found < 0) { ++ err = found; ++ goto out_unlock; ++ } ++ ++ if (found) { ++ /* Ensure the znode is dirtied */ ++ if (znode->cnext || !ubifs_zn_dirty(znode)) { ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } ++ } ++ zbr = &znode->zbranch[n]; ++ lnc_free(zbr); ++ err = ubifs_add_dirt(c, zbr->lnum, ++ zbr->len); ++ if (err) ++ goto out_unlock; ++ zbr->lnum = lnum; ++ zbr->offs = offs; ++ zbr->len = len; ++ } ++ } ++ } ++ ++ if (!found) ++ err = ubifs_add_dirt(c, lnum, len); ++ ++ if (!err) ++ err = dbg_check_tnc(c, 0); ++ ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_tnc_add_nm - add a "hashed" node to TNC. ++ * @c: UBIFS file-system description object ++ * @key: key to add ++ * @lnum: LEB number of node ++ * @offs: node offset ++ * @len: node length ++ * @nm: node name ++ * ++ * This is the same as 'ubifs_tnc_add()' but it should be used with keys which ++ * may have collisions, like directory entry keys. ++ */ ++int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, ++ int lnum, int offs, int len, const struct qstr *nm) ++{ ++ int found, n, err = 0; ++ struct ubifs_znode *znode; ++ ++ mutex_lock(&c->tnc_mutex); ++ dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, ++ DBGKEY(key)); ++ found = lookup_level0_dirty(c, key, &znode, &n); ++ if (found < 0) { ++ err = found; ++ goto out_unlock; ++ } ++ ++ if (found == 1) { ++ if (c->replaying) ++ found = fallible_resolve_collision(c, key, &znode, &n, ++ nm, 1); ++ else ++ found = resolve_collision(c, key, &znode, &n, nm); ++ dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n); ++ if (found < 0) { ++ err = found; ++ goto out_unlock; ++ } ++ ++ /* Ensure the znode is dirtied */ ++ if (znode->cnext || !ubifs_zn_dirty(znode)) { ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } ++ } ++ ++ if (found == 1) { ++ struct ubifs_zbranch *zbr = &znode->zbranch[n]; ++ ++ lnc_free(zbr); ++ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); ++ zbr->lnum = lnum; ++ zbr->offs = offs; ++ zbr->len = len; ++ goto out_unlock; ++ } ++ } ++ ++ if (!found) { ++ struct ubifs_zbranch zbr; ++ ++ zbr.znode = NULL; ++ zbr.lnum = lnum; ++ zbr.offs = offs; ++ zbr.len = len; ++ key_copy(c, key, &zbr.key); ++ err = tnc_insert(c, znode, &zbr, n + 1); ++ if (err) ++ goto out_unlock; ++ if (c->replaying) { ++ /* ++ * We did not find it in the index so there may be a ++ * dangling branch still in the index. So we remove it ++ * by passing 'ubifs_tnc_remove_nm()' the same key but ++ * an unmatchable name. ++ */ ++ struct qstr noname = { .len = 0, .name = "" }; ++ ++ err = dbg_check_tnc(c, 0); ++ mutex_unlock(&c->tnc_mutex); ++ if (err) ++ return err; ++ return ubifs_tnc_remove_nm(c, key, &noname); ++ } ++ } ++ ++out_unlock: ++ if (!err) ++ err = dbg_check_tnc(c, 0); ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * tnc_delete - delete a znode form TNC. ++ * @c: UBIFS file-system description object ++ * @znode: znode to delete from ++ * @n: zbranch slot number to delete ++ * ++ * This function deletes a leaf node from @n-th slot of @znode. Returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) ++{ ++ struct ubifs_zbranch *zbr; ++ struct ubifs_znode *zp; ++ int i, err; ++ ++ /* Delete without merge for now */ ++ ubifs_assert(znode->level == 0); ++ ubifs_assert(n >= 0 && n < c->fanout); ++ dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); ++ ++ zbr = &znode->zbranch[n]; ++ lnc_free(zbr); ++ ++ err = ubifs_add_dirt(c, zbr->lnum, zbr->len); ++ if (err) { ++ dbg_dump_znode(c, znode); ++ return err; ++ } ++ ++ /* We do not "gap" zbranch slots */ ++ for (i = n; i < znode->child_cnt - 1; i++) ++ znode->zbranch[i] = znode->zbranch[i + 1]; ++ znode->child_cnt -= 1; ++ ++ if (znode->child_cnt > 0) ++ return 0; ++ ++ /* ++ * This was the last zbranch, we have to delete this znode from the ++ * parent. ++ */ ++ ++ do { ++ ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ++ ubifs_assert(ubifs_zn_dirty(znode)); ++ ++ zp = znode->parent; ++ n = znode->iip; ++ ++ atomic_long_dec(&c->dirty_zn_cnt); ++ ++ err = insert_old_idx_znode(c, znode); ++ if (err) ++ return err; ++ ++ if (znode->cnext) { ++ __set_bit(OBSOLETE_ZNODE, &znode->flags); ++ atomic_long_inc(&c->clean_zn_cnt); ++ atomic_long_inc(&ubifs_clean_zn_cnt); ++ } else ++ kfree(znode); ++ znode = zp; ++ } while (znode->child_cnt == 1); /* while removing last child */ ++ ++ /* Remove from znode, entry n - 1 */ ++ znode->child_cnt -= 1; ++ ubifs_assert(znode->level != 0); ++ for (i = n; i < znode->child_cnt; i++) { ++ znode->zbranch[i] = znode->zbranch[i + 1]; ++ if (znode->zbranch[i].znode) ++ znode->zbranch[i].znode->iip = i; ++ } ++ ++ /* ++ * If this is the root and it has only 1 child then ++ * collapse the tree. ++ */ ++ if (!znode->parent) { ++ while (znode->child_cnt == 1 && znode->level != 0) { ++ zp = znode; ++ zbr = &znode->zbranch[0]; ++ znode = get_znode(c, znode, 0); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ znode = dirty_cow_znode(c, zbr); ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ znode->parent = NULL; ++ znode->iip = 0; ++ if (c->zroot.len) { ++ err = insert_old_idx(c, c->zroot.lnum, ++ c->zroot.offs); ++ if (err) ++ return err; ++ } ++ c->zroot.lnum = zbr->lnum; ++ c->zroot.offs = zbr->offs; ++ c->zroot.len = zbr->len; ++ c->zroot.znode = znode; ++ ubifs_assert(!test_bit(OBSOLETE_ZNODE, ++ &zp->flags)); ++ ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); ++ atomic_long_dec(&c->dirty_zn_cnt); ++ ++ if (zp->cnext) { ++ __set_bit(OBSOLETE_ZNODE, &zp->flags); ++ atomic_long_inc(&c->clean_zn_cnt); ++ atomic_long_inc(&ubifs_clean_zn_cnt); ++ } else ++ kfree(zp); ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * ubifs_tnc_remove - remove an index entry of a node. ++ * @c: UBIFS file-system description object ++ * @key: key of node ++ * ++ * Returns %0 on success or negative error code on failure. ++ */ ++int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) ++{ ++ int found, n, err = 0; ++ struct ubifs_znode *znode; ++ ++ mutex_lock(&c->tnc_mutex); ++ dbg_tnc("key %s", DBGKEY(key)); ++ found = lookup_level0_dirty(c, key, &znode, &n); ++ if (found < 0) { ++ err = found; ++ goto out_unlock; ++ } ++ if (found == 1) ++ err = tnc_delete(c, znode, n); ++ if (!err) ++ err = dbg_check_tnc(c, 0); ++ ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node. ++ * @c: UBIFS file-system description object ++ * @key: key of node ++ * @nm: directory entry name ++ * ++ * Returns %0 on success or negative error code on failure. ++ */ ++int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, ++ const struct qstr *nm) ++{ ++ int n, err; ++ struct ubifs_znode *znode; ++ ++ mutex_lock(&c->tnc_mutex); ++ dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); ++ err = lookup_level0_dirty(c, key, &znode, &n); ++ if (err < 0) ++ goto out_unlock; ++ ++ if (err) { ++ if (c->replaying) ++ err = fallible_resolve_collision(c, key, &znode, &n, ++ nm, 0); ++ else ++ err = resolve_collision(c, key, &znode, &n, nm); ++ dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); ++ if (err < 0) ++ goto out_unlock; ++ if (err) { ++ /* Ensure the znode is dirtied */ ++ if (znode->cnext || !ubifs_zn_dirty(znode)) { ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } ++ } ++ err = tnc_delete(c, znode, n); ++ } ++ } ++ ++out_unlock: ++ if (!err) ++ err = dbg_check_tnc(c, 0); ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * key_in_range - determine if a key falls within a range of keys. ++ * @c: UBIFS file-system description object ++ * @key: key to check ++ * @from_key: lowest key in range ++ * @to_key: highest key in range ++ * ++ * This function returns %1 if the key is in range and %0 otherwise. ++ */ ++static int key_in_range(struct ubifs_info *c, union ubifs_key *key, ++ union ubifs_key *from_key, union ubifs_key *to_key) ++{ ++ if (keys_cmp(c, key, from_key) < 0) ++ return 0; ++ if (keys_cmp(c, key, to_key) > 0) ++ return 0; ++ return 1; ++} ++ ++/** ++ * ubifs_tnc_remove_range - remove index entries in range. ++ * @c: UBIFS file-system description object ++ * @from_key: lowest key to remove ++ * @to_key: highest key to remove ++ * ++ * This function removes index entries starting at @from_key and ending at ++ * @to_key. This function returns zero in case of success and a negative error ++ * code in case of failure. ++ */ ++int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, ++ union ubifs_key *to_key) ++{ ++ int i, n, k, err = 0; ++ struct ubifs_znode *znode; ++ union ubifs_key *key; ++ ++ mutex_lock(&c->tnc_mutex); ++ while (1) { ++ /* Find first level 0 znode that contains keys to remove */ ++ err = ubifs_lookup_level0(c, from_key, &znode, &n); ++ if (err < 0) ++ goto out_unlock; ++ ++ if (err) ++ key = from_key; ++ else { ++ err = tnc_next(c, &znode, &n); ++ if (err == -ENOENT) { ++ err = 0; ++ goto out_unlock; ++ } ++ if (err < 0) ++ goto out_unlock; ++ key = &znode->zbranch[n].key; ++ if (!key_in_range(c, key, from_key, to_key)) { ++ err = 0; ++ goto out_unlock; ++ } ++ } ++ ++ /* Ensure the znode is dirtied */ ++ if (znode->cnext || !ubifs_zn_dirty(znode)) { ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } ++ } ++ ++ /* Remove all keys in range except the first */ ++ for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) { ++ key = &znode->zbranch[i].key; ++ if (!key_in_range(c, key, from_key, to_key)) ++ break; ++ lnc_free(&znode->zbranch[i]); ++ err = ubifs_add_dirt(c, znode->zbranch[i].lnum, ++ znode->zbranch[i].len); ++ if (err) { ++ dbg_dump_znode(c, znode); ++ goto out_unlock; ++ } ++ dbg_tnc("removing %s", DBGKEY(key)); ++ } ++ if (k) { ++ for (i = n + 1 + k; i < znode->child_cnt; i++) ++ znode->zbranch[i - k] = znode->zbranch[i]; ++ znode->child_cnt -= k; ++ } ++ ++ /* Now delete the first */ ++ err = tnc_delete(c, znode, n); ++ if (err) ++ goto out_unlock; ++ } ++ ++out_unlock: ++ if (!err) ++ err = dbg_check_tnc(c, 0); ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_tnc_remove_ino - remove an inode from TNC. ++ * @c: UBIFS file-system description object ++ * @inum: inode number to remove ++ * ++ * This function remove inode @inum and all the extended attributes associated ++ * with the anode from TNC and returns zero in case of success or a negative ++ * error code in case of failure. ++ */ ++int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) ++{ ++ union ubifs_key key1, key2; ++ struct ubifs_dent_node *xent, *pxent = NULL; ++ struct qstr nm = { .name = NULL }; ++ ++ dbg_tnc("ino %lu", (unsigned long)inum); ++ ++ /* ++ * Walk all extended attribute entries and remove them together with ++ * corresponding extended attribute inodes. ++ */ ++ lowest_xent_key(c, &key1, inum); ++ while (1) { ++ ino_t xattr_inum; ++ int err; ++ ++ xent = ubifs_tnc_next_ent(c, &key1, &nm); ++ if (IS_ERR(xent)) { ++ err = PTR_ERR(xent); ++ if (err == -ENOENT) ++ break; ++ return err; ++ } ++ ++ xattr_inum = le64_to_cpu(xent->inum); ++ dbg_tnc("xent '%s', ino %lu", xent->name, ++ (unsigned long)xattr_inum); ++ ++ nm.name = xent->name; ++ nm.len = le16_to_cpu(xent->nlen); ++ err = ubifs_tnc_remove_nm(c, &key1, &nm); ++ if (err) { ++ kfree(xent); ++ return err; ++ } ++ ++ lowest_ino_key(c, &key1, xattr_inum); ++ highest_ino_key(c, &key2, xattr_inum); ++ err = ubifs_tnc_remove_range(c, &key1, &key2); ++ if (err) { ++ kfree(xent); ++ return err; ++ } ++ ++ kfree(pxent); ++ pxent = xent; ++ key_read(c, &xent->key, &key1); ++ } ++ ++ kfree(pxent); ++ lowest_ino_key(c, &key1, inum); ++ highest_ino_key(c, &key2, inum); ++ ++ return ubifs_tnc_remove_range(c, &key1, &key2); ++} ++ ++/** ++ * ubifs_tnc_next_ent - walk directory or extended attribute entries. ++ * @c: UBIFS file-system description object ++ * @key: key of last entry ++ * @nm: name of last entry found or %NULL ++ * ++ * This function finds and reads the next directory or extended attribute entry ++ * after the given key (@key) if there is one. @nm is used to resolve ++ * collisions. ++ * ++ * If the name of the current entry is not known and only the key is known, ++ * @nm->name has to be %NULL. In this case the semantics of this function is a ++ * little bit different and it returns the entry corresponding to this key, not ++ * the next one. If the key was not found, the closest "right" entry is ++ * returned. ++ * ++ * If the fist entry has to be found, @key has to contain the lowest possible ++ * key value for this inode and @name has to be %NULL. ++ * ++ * This function returns the found directory or extended attribute entry node ++ * in case of success, %-ENOENT is returned if no entry was found, and a ++ * negative error code is returned in case of failure. ++ */ ++struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, ++ union ubifs_key *key, ++ const struct qstr *nm) ++{ ++ int n, err, type = key_type(c, key); ++ struct ubifs_znode *znode; ++ struct ubifs_dent_node *dent; ++ struct ubifs_zbranch *zbr; ++ union ubifs_key *dkey; ++ ++ dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); ++ ubifs_assert(is_hash_key(c, key)); ++ ++ mutex_lock(&c->tnc_mutex); ++ err = ubifs_lookup_level0(c, key, &znode, &n); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ ++ if (nm->name) { ++ if (err) { ++ /* Handle collisions */ ++ err = resolve_collision(c, key, &znode, &n, nm); ++ dbg_tnc("rc returned %d, znode %p, n %d", ++ err, znode, n); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ } ++ ++ /* Now find next entry */ ++ err = tnc_next(c, &znode, &n); ++ if (unlikely(err)) ++ goto out_unlock; ++ } else { ++ /* ++ * The full name of the entry was not given, in which case the ++ * behavior of this function is a little different and it ++ * returns current entry, not the next one. ++ */ ++ if (!err) { ++ /* ++ * However, the given key does not exist in the TNC ++ * tree and @znode/@n variables contain the closest ++ * "preceding" element. Switch to the next one. ++ */ ++ err = tnc_next(c, &znode, &n); ++ if (err) ++ goto out_unlock; ++ } ++ } ++ ++ zbr = &znode->zbranch[n]; ++ dent = kmalloc(zbr->len, GFP_NOFS); ++ if (unlikely(!dent)) { ++ err = -ENOMEM; ++ goto out_unlock; ++ } ++ ++ /* ++ * The above 'tnc_next()' call could lead us to the next inode, check ++ * this. ++ */ ++ dkey = &zbr->key; ++ if (key_inum(c, dkey) != key_inum(c, key) || ++ key_type(c, dkey) != type) { ++ err = -ENOENT; ++ goto out_free; ++ } ++ ++ err = tnc_read_node_nm(c, zbr, dent); ++ if (unlikely(err)) ++ goto out_free; ++ ++ mutex_unlock(&c->tnc_mutex); ++ return dent; ++ ++out_free: ++ kfree(dent); ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return ERR_PTR(err); ++} ++ ++/** ++ * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit. ++ * @c: UBIFS file-system description object ++ * ++ * Destroy left-over obsolete znodes from a failed commit. ++ */ ++static void tnc_destroy_cnext(struct ubifs_info *c) ++{ ++ struct ubifs_znode *cnext; ++ ++ if (!c->cnext) ++ return; ++ ubifs_assert(c->cmt_state == COMMIT_BROKEN); ++ cnext = c->cnext; ++ do { ++ struct ubifs_znode *znode = cnext; ++ ++ cnext = cnext->cnext; ++ if (test_bit(OBSOLETE_ZNODE, &znode->flags)) ++ kfree(znode); ++ } while (cnext && cnext != c->cnext); ++} ++ ++/** ++ * ubifs_tnc_close - close TNC subsystem and free all related resources. ++ * @c: UBIFS file-system description object ++ */ ++void ubifs_tnc_close(struct ubifs_info *c) ++{ ++ long clean_freed; ++ ++ tnc_destroy_cnext(c); ++ if (c->zroot.znode) { ++ clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); ++ atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); ++ } ++ kfree(c->gap_lebs); ++ kfree(c->ilebs); ++ destroy_old_idx(c); ++} ++ ++/** ++ * left_znode - get the znode to the left. ++ * @c: UBIFS file-system description object ++ * @znode: znode ++ * ++ * This function returns a pointer to the znode to the left of @znode or NULL if ++ * there is not one. A negative error code is returned on failure. ++ */ ++static struct ubifs_znode *left_znode(struct ubifs_info *c, ++ struct ubifs_znode *znode) ++{ ++ int level = znode->level; ++ ++ while (1) { ++ int n = znode->iip - 1; ++ ++ /* Go up until we can go left */ ++ znode = znode->parent; ++ if (!znode) ++ return NULL; ++ if (n >= 0) { ++ /* Now go down the rightmost branch to 'level' */ ++ znode = get_znode(c, znode, n); ++ if (IS_ERR(znode)) ++ return znode; ++ while (znode->level != level) { ++ n = znode->child_cnt - 1; ++ znode = get_znode(c, znode, n); ++ if (IS_ERR(znode)) ++ return znode; ++ } ++ break; ++ } ++ } ++ return znode; ++} ++ ++/** ++ * right_znode - get the znode to the right. ++ * @c: UBIFS file-system description object ++ * @znode: znode ++ * ++ * This function returns a pointer to the znode to the right of @znode or NULL ++ * if there is not one. A negative error code is returned on failure. ++ */ ++static struct ubifs_znode *right_znode(struct ubifs_info *c, ++ struct ubifs_znode *znode) ++{ ++ int level = znode->level; ++ ++ while (1) { ++ int n = znode->iip + 1; ++ ++ /* Go up until we can go right */ ++ znode = znode->parent; ++ if (!znode) ++ return NULL; ++ if (n < znode->child_cnt) { ++ /* Now go down the leftmost branch to 'level' */ ++ znode = get_znode(c, znode, n); ++ if (IS_ERR(znode)) ++ return znode; ++ while (znode->level != level) { ++ znode = get_znode(c, znode, 0); ++ if (IS_ERR(znode)) ++ return znode; ++ } ++ break; ++ } ++ } ++ return znode; ++} ++ ++/** ++ * lookup_znode - find a particular indexing node from TNC. ++ * @c: UBIFS file-system description object ++ * @key: index node key to lookup ++ * @level: index node level ++ * @lnum: index node LEB number ++ * @offs: index node offset ++ * ++ * This function searches an indexing node by its first key @key and its ++ * address @lnum:@offs. It looks up the indexing tree by pulling all indexing ++ * nodes it traverses to TNC. This function is called fro indexing nodes which ++ * were found on the media by scanning, for example when garbage-collecting or ++ * when doing in-the-gaps commit. This means that the indexing node which is ++ * looked for does not have to have exactly the same leftmost key @key, because ++ * the leftmost key may have been changed, in which case TNC will contain a ++ * dirty znode which still refers the same @lnum:@offs. This function is clever ++ * enough to recognize such indexing nodes. ++ * ++ * Note, if a znode was deleted or changed too much, then this function will ++ * not find it. For situations like this UBIFS has the old index RB-tree ++ * (indexed by @lnum:@offs). ++ * ++ * This function returns a pointer to the znode found or %NULL if it is not ++ * found. A negative error code is returned on failure. ++ */ ++static struct ubifs_znode *lookup_znode(struct ubifs_info *c, ++ union ubifs_key *key, int level, ++ int lnum, int offs) ++{ ++ struct ubifs_znode *znode, *zn; ++ int n, nn; ++ ++ /* ++ * The arguments have probably been read off flash, so don't assume ++ * they are valid. ++ */ ++ if (level < 0) ++ return ERR_PTR(-EINVAL); ++ ++ /* Get the root znode */ ++ znode = c->zroot.znode; ++ if (!znode) { ++ znode = ubifs_load_znode(c, &c->zroot, NULL, 0); ++ if (IS_ERR(znode)) ++ return znode; ++ } ++ /* Check if it is the one we are looking for */ ++ if (c->zroot.lnum == lnum && c->zroot.offs == offs) ++ return znode; ++ /* Descend to the parent level i.e. (level + 1) */ ++ if (level >= znode->level) ++ return NULL; ++ while (1) { ++ ubifs_search_zbranch(c, znode, key, &n); ++ if (n < 0) { ++ /* ++ * We reached a znode where the leftmost key is greater ++ * than the key we are searching for. This is the same ++ * situation as the one described in a huge comment at ++ * the end of the 'ubifs_lookup_level0()' function. And ++ * for exactly the same reasons we have to try to look ++ * left before giving up. ++ */ ++ znode = left_znode(c, znode); ++ if (!znode) ++ return NULL; ++ if (IS_ERR(znode)) ++ return znode; ++ ubifs_search_zbranch(c, znode, key, &n); ++ ubifs_assert(n >= 0); ++ } ++ if (znode->level == level + 1) ++ break; ++ znode = get_znode(c, znode, n); ++ if (IS_ERR(znode)) ++ return znode; ++ } ++ /* Check if the child is the one we are looking for */ ++ if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs) ++ return get_znode(c, znode, n); ++ /* If the key is unique, there is nowhere else to look */ ++ if (!is_hash_key(c, key)) ++ return NULL; ++ /* ++ * The key is not unique and so may be also in the znodes to either ++ * side. ++ */ ++ zn = znode; ++ nn = n; ++ /* Look left */ ++ while (1) { ++ /* Move one branch to the left */ ++ if (n) ++ n -= 1; ++ else { ++ znode = left_znode(c, znode); ++ if (!znode) ++ break; ++ if (IS_ERR(znode)) ++ return znode; ++ n = znode->child_cnt - 1; ++ } ++ /* Check it */ ++ if (znode->zbranch[n].lnum == lnum && ++ znode->zbranch[n].offs == offs) ++ return get_znode(c, znode, n); ++ /* Stop if the key is less than the one we are looking for */ ++ if (keys_cmp(c, &znode->zbranch[n].key, key) < 0) ++ break; ++ } ++ /* Back to the middle */ ++ znode = zn; ++ n = nn; ++ /* Look right */ ++ while (1) { ++ /* Move one branch to the right */ ++ if (++n >= znode->child_cnt) { ++ znode = right_znode(c, znode); ++ if (!znode) ++ break; ++ if (IS_ERR(znode)) ++ return znode; ++ n = 0; ++ } ++ /* Check it */ ++ if (znode->zbranch[n].lnum == lnum && ++ znode->zbranch[n].offs == offs) ++ return get_znode(c, znode, n); ++ /* Stop if the key is greater than the one we are looking for */ ++ if (keys_cmp(c, &znode->zbranch[n].key, key) > 0) ++ break; ++ } ++ return NULL; ++} ++ ++/** ++ * is_idx_node_in_tnc - determine if an index node is in the TNC. ++ * @c: UBIFS file-system description object ++ * @key: key of index node ++ * @level: index node level ++ * @lnum: LEB number of index node ++ * @offs: offset of index node ++ * ++ * This function returns %0 if the index node is not referred to in the TNC, %1 ++ * if the index node is referred to in the TNC and the corresponding znode is ++ * dirty, %2 if an index node is referred to in the TNC and the corresponding ++ * znode is clean, and a negative error code in case of failure. ++ * ++ * Note, the @key argument has to be the key of the first child. Also note, ++ * this function relies on the fact that 0:0 is never a valid LEB number and ++ * offset for a main-area node. ++ */ ++int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, ++ int lnum, int offs) ++{ ++ struct ubifs_znode *znode; ++ ++ znode = lookup_znode(c, key, level, lnum, offs); ++ if (!znode) ++ return 0; ++ if (IS_ERR(znode)) ++ return PTR_ERR(znode); ++ ++ return ubifs_zn_dirty(znode) ? 1 : 2; ++} ++ ++/** ++ * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC. ++ * @c: UBIFS file-system description object ++ * @key: node key ++ * @lnum: node LEB number ++ * @offs: node offset ++ * ++ * This function returns %1 if the node is referred to in the TNC, %0 if it is ++ * not, and a negative error code in case of failure. ++ * ++ * Note, this function relies on the fact that 0:0 is never a valid LEB number ++ * and offset for a main-area node. ++ */ ++static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, ++ int lnum, int offs) ++{ ++ struct ubifs_zbranch *zbr; ++ struct ubifs_znode *znode, *zn; ++ int n, found, err, nn; ++ const int unique = !is_hash_key(c, key); ++ ++ found = ubifs_lookup_level0(c, key, &znode, &n); ++ if (found < 0) ++ return found; /* Error code */ ++ if (!found) ++ return 0; ++ zbr = &znode->zbranch[n]; ++ if (lnum == zbr->lnum && offs == zbr->offs) ++ return 1; /* Found it */ ++ if (unique) ++ return 0; ++ /* ++ * Because the key is not unique, we have to look left ++ * and right as well ++ */ ++ zn = znode; ++ nn = n; ++ /* Look left */ ++ while (1) { ++ err = tnc_prev(c, &znode, &n); ++ if (err == -ENOENT) ++ break; ++ if (err) ++ return err; ++ if (keys_cmp(c, key, &znode->zbranch[n].key)) ++ break; ++ zbr = &znode->zbranch[n]; ++ if (lnum == zbr->lnum && offs == zbr->offs) ++ return 1; /* Found it */ ++ } ++ /* Look right */ ++ znode = zn; ++ n = nn; ++ while (1) { ++ err = tnc_next(c, &znode, &n); ++ if (err) { ++ if (err == -ENOENT) ++ return 0; ++ return err; ++ } ++ if (keys_cmp(c, key, &znode->zbranch[n].key)) ++ break; ++ zbr = &znode->zbranch[n]; ++ if (lnum == zbr->lnum && offs == zbr->offs) ++ return 1; /* Found it */ ++ } ++ return 0; ++} ++ ++/** ++ * ubifs_tnc_has_node - determine whether a node is in the TNC. ++ * @c: UBIFS file-system description object ++ * @key: node key ++ * @level: index node level (if it is an index node) ++ * @lnum: node LEB number ++ * @offs: node offset ++ * @is_idx: non-zero if the node is an index node ++ * ++ * This function returns %1 if the node is in the TNC, %0 if it is not, and a ++ * negative error code in case of failure. For index nodes, @key has to be the ++ * key of the first child. An index node is considered to be in the TNC only if ++ * the corresponding znode is clean or has not been loaded. ++ */ ++int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, ++ int lnum, int offs, int is_idx) ++{ ++ int err; ++ ++ mutex_lock(&c->tnc_mutex); ++ if (is_idx) { ++ err = is_idx_node_in_tnc(c, key, level, lnum, offs); ++ if (err < 0) ++ goto out_unlock; ++ if (err == 1) ++ /* The index node was found but it was dirty */ ++ err = 0; ++ else if (err == 2) ++ /* The index node was found and it was clean */ ++ err = 1; ++ else ++ BUG_ON(err != 0); ++ } else ++ err = is_leaf_node_in_tnc(c, key, lnum, offs); ++ ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * ubifs_dirty_idx_node - dirty an index node. ++ * @c: UBIFS file-system description object ++ * @key: index node key ++ * @level: index node level ++ * @lnum: index node LEB number ++ * @offs: index node offset ++ * ++ * This function loads and dirties an index node so that it can be garbage ++ * collected. The @key argument has to be the key of the first child. This ++ * function relies on the fact that 0:0 is never a valid LEB number and offset ++ * for a main-area node. Returns %0 on success and a negative error code on ++ * failure. ++ */ ++int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, ++ int lnum, int offs) ++{ ++ struct ubifs_znode *znode; ++ int err = 0; ++ ++ mutex_lock(&c->tnc_mutex); ++ znode = lookup_znode(c, key, level, lnum, offs); ++ if (!znode) ++ goto out_unlock; ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } ++ ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} +diff -Nurd linux-2.6.24/fs/ubifs/tnc_commit.c ubifs-v2.6.24/fs/ubifs/tnc_commit.c +--- linux-2.6.24/fs/ubifs/tnc_commit.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/tnc_commit.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1105 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* This file implements TNC functions for committing */ ++ ++#include "ubifs.h" ++ ++/** ++ * make_idx_node - make an index node for fill-the-gaps method of TNC commit. ++ * @c: UBIFS file-system description object ++ * @idx: buffer in which to place new index node ++ * @znode: znode from which to make new index node ++ * @lnum: LEB number where new index node will be written ++ * @offs: offset where new index node will be written ++ * @len: length of new index node ++ */ ++static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, ++ struct ubifs_znode *znode, int lnum, int offs, int len) ++{ ++ struct ubifs_znode *zp; ++ int i, err; ++ ++ /* Make index node */ ++ idx->ch.node_type = UBIFS_IDX_NODE; ++ idx->child_cnt = cpu_to_le16(znode->child_cnt); ++ idx->level = cpu_to_le16(znode->level); ++ for (i = 0; i < znode->child_cnt; i++) { ++ struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); ++ struct ubifs_zbranch *zbr = &znode->zbranch[i]; ++ ++ key_write_idx(c, &zbr->key, &br->key); ++ br->lnum = cpu_to_le32(zbr->lnum); ++ br->offs = cpu_to_le32(zbr->offs); ++ br->len = cpu_to_le32(zbr->len); ++ if (!zbr->lnum || !zbr->len) { ++ ubifs_err("bad ref in znode"); ++ dbg_dump_znode(c, znode); ++ if (zbr->znode) ++ dbg_dump_znode(c, zbr->znode); ++ } ++ } ++ ubifs_prepare_node(c, idx, len, 0); ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ znode->lnum = lnum; ++ znode->offs = offs; ++ znode->len = len; ++#endif ++ ++ err = insert_old_idx_znode(c, znode); ++ ++ /* Update the parent */ ++ zp = znode->parent; ++ if (zp) { ++ struct ubifs_zbranch *zbr; ++ ++ zbr = &zp->zbranch[znode->iip]; ++ zbr->lnum = lnum; ++ zbr->offs = offs; ++ zbr->len = len; ++ } else { ++ c->zroot.lnum = lnum; ++ c->zroot.offs = offs; ++ c->zroot.len = len; ++ } ++ c->calc_idx_sz += ALIGN(len, 8); ++ ++ atomic_long_dec(&c->dirty_zn_cnt); ++ ++ ubifs_assert(ubifs_zn_dirty(znode)); ++ ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); ++ ++ __clear_bit(DIRTY_ZNODE, &znode->flags); ++ __clear_bit(COW_ZNODE, &znode->flags); ++ ++ return err; ++} ++ ++/** ++ * fill_gap - make index nodes in gaps in dirty index LEBs. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number that gap appears in ++ * @gap_start: offset of start of gap ++ * @gap_end: offset of end of gap ++ * @dirt: adds dirty space to this ++ * ++ * This function returns the number of index nodes written into the gap. ++ */ ++static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end, ++ int *dirt) ++{ ++ int len, gap_remains, gap_pos, written, pad_len; ++ ++ ubifs_assert((gap_start & 7) == 0); ++ ubifs_assert((gap_end & 7) == 0); ++ ubifs_assert(gap_end >= gap_start); ++ ++ gap_remains = gap_end - gap_start; ++ if (!gap_remains) ++ return 0; ++ gap_pos = gap_start; ++ written = 0; ++ while (c->enext) { ++ len = ubifs_idx_node_sz(c, c->enext->child_cnt); ++ if (len < gap_remains) { ++ struct ubifs_znode *znode = c->enext; ++ const int alen = ALIGN(len, 8); ++ int err; ++ ++ ubifs_assert(alen <= gap_remains); ++ err = make_idx_node(c, c->ileb_buf + gap_pos, znode, ++ lnum, gap_pos, len); ++ if (err) ++ return err; ++ gap_remains -= alen; ++ gap_pos += alen; ++ c->enext = znode->cnext; ++ if (c->enext == c->cnext) ++ c->enext = NULL; ++ written += 1; ++ } else ++ break; ++ } ++ if (gap_end == c->leb_size) { ++ c->ileb_len = ALIGN(gap_pos, c->min_io_size); ++ /* Pad to end of min_io_size */ ++ pad_len = c->ileb_len - gap_pos; ++ } else ++ /* Pad to end of gap */ ++ pad_len = gap_remains; ++ dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d", ++ lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len); ++ ubifs_pad(c, c->ileb_buf + gap_pos, pad_len); ++ *dirt += pad_len; ++ return written; ++} ++ ++/** ++ * find_old_idx - find an index node obsoleted since the last commit start. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB number of obsoleted index node ++ * @offs: offset of obsoleted index node ++ * ++ * Returns %1 if found and %0 otherwise. ++ */ ++static int find_old_idx(struct ubifs_info *c, int lnum, int offs) ++{ ++ struct ubifs_old_idx *o; ++ struct rb_node *p; ++ ++ p = c->old_idx.rb_node; ++ while (p) { ++ o = rb_entry(p, struct ubifs_old_idx, rb); ++ if (lnum < o->lnum) ++ p = p->rb_left; ++ else if (lnum > o->lnum) ++ p = p->rb_right; ++ else if (offs < o->offs) ++ p = p->rb_left; ++ else if (offs > o->offs) ++ p = p->rb_right; ++ else ++ return 1; ++ } ++ return 0; ++} ++ ++/** ++ * is_idx_node_in_use - determine if an index node can be overwritten. ++ * @c: UBIFS file-system description object ++ * @key: key of index node ++ * @level: index node level ++ * @lnum: LEB number of index node ++ * @offs: offset of index node ++ * ++ * If @key / @lnum / @offs identify an index node that was not part of the old ++ * index, then this function returns %0 (obsolete). Else if the index node was ++ * part of the old index but is now dirty %1 is returned, else if it is clean %2 ++ * is returned. A negative error code is returned on failure. ++ */ ++static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, ++ int level, int lnum, int offs) ++{ ++ int ret; ++ ++ ret = is_idx_node_in_tnc(c, key, level, lnum, offs); ++ if (ret < 0) ++ return ret; /* Error code */ ++ if (ret == 0) ++ if (find_old_idx(c, lnum, offs)) ++ return 1; ++ return ret; ++} ++ ++/** ++ * layout_leb_in_gaps - layout index nodes using in-the-gaps method. ++ * @c: UBIFS file-system description object ++ * @p: return LEB number here ++ * ++ * This function lays out new index nodes for dirty znodes using in-the-gaps ++ * method of TNC commit. ++ * This function merely puts the next znode into the next gap, making no attempt ++ * to try to maximise the number of znodes that fit. ++ * This function returns the number of index nodes written into the gaps, or a ++ * negative error code on failure. ++ */ ++static int layout_leb_in_gaps(struct ubifs_info *c, int *p) ++{ ++ struct ubifs_scan_leb *sleb; ++ struct ubifs_scan_node *snod; ++ int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written; ++ ++ tot_written = 0; ++ /* Get an index LEB with lots of obsolete index nodes */ ++ lnum = ubifs_find_dirty_idx_leb(c); ++ if (lnum < 0) ++ /* ++ * There also may be dirt in the index head that could be ++ * filled, however we do not check there at present. ++ */ ++ return lnum; /* Error code */ ++ *p = lnum; ++ dbg_gc("LEB %d", lnum); ++ /* ++ * Scan the index LEB. We use the generic scan for this even though ++ * it is more comprehensive and less efficient than is needed for this ++ * purpose. ++ */ ++ sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); ++ c->ileb_len = 0; ++ if (IS_ERR(sleb)) ++ return PTR_ERR(sleb); ++ gap_start = 0; ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ struct ubifs_idx_node *idx; ++ int in_use, level; ++ ++ ubifs_assert(snod->type == UBIFS_IDX_NODE); ++ idx = snod->node; ++ key_read(c, ubifs_idx_key(c, idx), &snod->key); ++ level = le16_to_cpu(idx->level); ++ /* Determine if the index node is in use (not obsolete) */ ++ in_use = is_idx_node_in_use(c, &snod->key, level, lnum, ++ snod->offs); ++ if (in_use < 0) { ++ ubifs_scan_destroy(sleb); ++ return in_use; /* Error code */ ++ } ++ if (in_use) { ++ if (in_use == 1) ++ dirt += ALIGN(snod->len, 8); ++ /* ++ * The obsolete index nodes form gaps that can be ++ * overwritten. This gap has ended because we have ++ * found an index node that is still in use ++ * i.e. not obsolete ++ */ ++ gap_end = snod->offs; ++ /* Try to fill gap */ ++ written = fill_gap(c, lnum, gap_start, gap_end, &dirt); ++ if (written < 0) { ++ ubifs_scan_destroy(sleb); ++ return written; /* Error code */ ++ } ++ tot_written += written; ++ gap_start = ALIGN(snod->offs + snod->len, 8); ++ } ++ } ++ ubifs_scan_destroy(sleb); ++ c->ileb_len = c->leb_size; ++ gap_end = c->leb_size; ++ /* Try to fill gap */ ++ written = fill_gap(c, lnum, gap_start, gap_end, &dirt); ++ if (written < 0) ++ return written; /* Error code */ ++ tot_written += written; ++ if (tot_written == 0) { ++ struct ubifs_lprops lp; ++ ++ dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); ++ err = ubifs_read_one_lp(c, lnum, &lp); ++ if (err) ++ return err; ++ if (lp.free == c->leb_size) { ++ /* ++ * We must have snatched this LEB from the idx_gc list ++ * so we need to correct the free and dirty space. ++ */ ++ err = ubifs_change_one_lp(c, lnum, ++ c->leb_size - c->ileb_len, ++ dirt, 0, 0, 0); ++ if (err) ++ return err; ++ } ++ return 0; ++ } ++ err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt, ++ 0, 0, 0); ++ if (err) ++ return err; ++ err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len, ++ UBI_SHORTTERM); ++ if (err) ++ return err; ++ dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); ++ return tot_written; ++} ++ ++/** ++ * get_leb_cnt - calculate the number of empty LEBs needed to commit. ++ * @c: UBIFS file-system description object ++ * @cnt: number of znodes to commit ++ * ++ * This function returns the number of empty LEBs needed to commit @cnt znodes ++ * to the current index head. The number is not exact and may be more than ++ * needed. ++ */ ++static int get_leb_cnt(struct ubifs_info *c, int cnt) ++{ ++ int d; ++ ++ /* Assume maximum index node size (i.e. overestimate space needed) */ ++ cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz; ++ if (cnt < 0) ++ cnt = 0; ++ d = c->leb_size / c->max_idx_node_sz; ++ return DIV_ROUND_UP(cnt, d); ++} ++ ++/** ++ * layout_in_gaps - in-the-gaps method of committing TNC. ++ * @c: UBIFS file-system description object ++ * @cnt: number of dirty znodes to commit. ++ * ++ * This function lays out new index nodes for dirty znodes using in-the-gaps ++ * method of TNC commit. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int layout_in_gaps(struct ubifs_info *c, int cnt) ++{ ++ int err, leb_needed_cnt, written, *p; ++ ++ dbg_gc("%d znodes to write", cnt); ++ ++ c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS); ++ if (!c->gap_lebs) ++ return -ENOMEM; ++ ++ p = c->gap_lebs; ++ do { ++ ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); ++ written = layout_leb_in_gaps(c, p); ++ if (written < 0) { ++ err = written; ++ if (err != -ENOSPC) { ++ kfree(c->gap_lebs); ++ c->gap_lebs = NULL; ++ return err; ++ } ++ if (!dbg_force_in_the_gaps_enabled) { ++ /* ++ * Do not print scary warnings if the debugging ++ * option which forces in-the-gaps is enabled. ++ */ ++ ubifs_err("out of space"); ++ spin_lock(&c->space_lock); ++ dbg_dump_budg(c); ++ spin_unlock(&c->space_lock); ++ dbg_dump_lprops(c); ++ } ++ /* Try to commit anyway */ ++ err = 0; ++ break; ++ } ++ p++; ++ cnt -= written; ++ leb_needed_cnt = get_leb_cnt(c, cnt); ++ dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt, ++ leb_needed_cnt, c->ileb_cnt); ++ } while (leb_needed_cnt > c->ileb_cnt); ++ ++ *p = -1; ++ return 0; ++} ++ ++/** ++ * layout_in_empty_space - layout index nodes in empty space. ++ * @c: UBIFS file-system description object ++ * ++ * This function lays out new index nodes for dirty znodes using empty LEBs. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int layout_in_empty_space(struct ubifs_info *c) ++{ ++ struct ubifs_znode *znode, *cnext, *zp; ++ int lnum, offs, len, next_len, buf_len, buf_offs, used, avail; ++ int wlen, blen, err; ++ ++ cnext = c->enext; ++ if (!cnext) ++ return 0; ++ ++ lnum = c->ihead_lnum; ++ buf_offs = c->ihead_offs; ++ ++ buf_len = ubifs_idx_node_sz(c, c->fanout); ++ buf_len = ALIGN(buf_len, c->min_io_size); ++ used = 0; ++ avail = buf_len; ++ ++ /* Ensure there is enough room for first write */ ++ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); ++ if (buf_offs + next_len > c->leb_size) ++ lnum = -1; ++ ++ while (1) { ++ znode = cnext; ++ ++ len = ubifs_idx_node_sz(c, znode->child_cnt); ++ ++ /* Determine the index node position */ ++ if (lnum == -1) { ++ if (c->ileb_nxt >= c->ileb_cnt) { ++ ubifs_err("out of space"); ++ return -ENOSPC; ++ } ++ lnum = c->ilebs[c->ileb_nxt++]; ++ buf_offs = 0; ++ used = 0; ++ avail = buf_len; ++ } ++ ++ offs = buf_offs + used; ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ znode->lnum = lnum; ++ znode->offs = offs; ++ znode->len = len; ++#endif ++ ++ /* Update the parent */ ++ zp = znode->parent; ++ if (zp) { ++ struct ubifs_zbranch *zbr; ++ int i; ++ ++ i = znode->iip; ++ zbr = &zp->zbranch[i]; ++ zbr->lnum = lnum; ++ zbr->offs = offs; ++ zbr->len = len; ++ } else { ++ c->zroot.lnum = lnum; ++ c->zroot.offs = offs; ++ c->zroot.len = len; ++ } ++ c->calc_idx_sz += ALIGN(len, 8); ++ ++ /* ++ * Once lprops is updated, we can decrease the dirty znode count ++ * but it is easier to just do it here. ++ */ ++ atomic_long_dec(&c->dirty_zn_cnt); ++ ++ /* ++ * Calculate the next index node length to see if there is ++ * enough room for it ++ */ ++ cnext = znode->cnext; ++ if (cnext == c->cnext) ++ next_len = 0; ++ else ++ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); ++ ++ if (c->min_io_size == 1) { ++ buf_offs += ALIGN(len, 8); ++ if (next_len) { ++ if (buf_offs + next_len <= c->leb_size) ++ continue; ++ err = ubifs_update_one_lp(c, lnum, 0, ++ c->leb_size - buf_offs, 0, 0); ++ if (err) ++ return err; ++ lnum = -1; ++ continue; ++ } ++ err = ubifs_update_one_lp(c, lnum, ++ c->leb_size - buf_offs, 0, 0, 0); ++ if (err) ++ return err; ++ break; ++ } ++ ++ /* Update buffer positions */ ++ wlen = used + len; ++ used += ALIGN(len, 8); ++ avail -= ALIGN(len, 8); ++ ++ if (next_len != 0 && ++ buf_offs + used + next_len <= c->leb_size && ++ avail > 0) ++ continue; ++ ++ if (avail <= 0 && next_len && ++ buf_offs + used + next_len <= c->leb_size) ++ blen = buf_len; ++ else ++ blen = ALIGN(wlen, c->min_io_size); ++ ++ /* The buffer is full or there are no more znodes to do */ ++ buf_offs += blen; ++ if (next_len) { ++ if (buf_offs + next_len > c->leb_size) { ++ err = ubifs_update_one_lp(c, lnum, ++ c->leb_size - buf_offs, blen - used, ++ 0, 0); ++ if (err) ++ return err; ++ lnum = -1; ++ } ++ used -= blen; ++ if (used < 0) ++ used = 0; ++ avail = buf_len - used; ++ continue; ++ } ++ err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs, ++ blen - used, 0, 0); ++ if (err) ++ return err; ++ break; ++ } ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ c->dbg->new_ihead_lnum = lnum; ++ c->dbg->new_ihead_offs = buf_offs; ++#endif ++ ++ return 0; ++} ++ ++/** ++ * layout_commit - determine positions of index nodes to commit. ++ * @c: UBIFS file-system description object ++ * @no_space: indicates that insufficient empty LEBs were allocated ++ * @cnt: number of znodes to commit ++ * ++ * Calculate and update the positions of index nodes to commit. If there were ++ * an insufficient number of empty LEBs allocated, then index nodes are placed ++ * into the gaps created by obsolete index nodes in non-empty index LEBs. For ++ * this purpose, an obsolete index node is one that was not in the index as at ++ * the end of the last commit. To write "in-the-gaps" requires that those index ++ * LEBs are updated atomically in-place. ++ */ ++static int layout_commit(struct ubifs_info *c, int no_space, int cnt) ++{ ++ int err; ++ ++ if (no_space) { ++ err = layout_in_gaps(c, cnt); ++ if (err) ++ return err; ++ } ++ err = layout_in_empty_space(c); ++ return err; ++} ++ ++/** ++ * find_first_dirty - find first dirty znode. ++ * @znode: znode to begin searching from ++ */ ++static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode) ++{ ++ int i, cont; ++ ++ if (!znode) ++ return NULL; ++ ++ while (1) { ++ if (znode->level == 0) { ++ if (ubifs_zn_dirty(znode)) ++ return znode; ++ return NULL; ++ } ++ cont = 0; ++ for (i = 0; i < znode->child_cnt; i++) { ++ struct ubifs_zbranch *zbr = &znode->zbranch[i]; ++ ++ if (zbr->znode && ubifs_zn_dirty(zbr->znode)) { ++ znode = zbr->znode; ++ cont = 1; ++ break; ++ } ++ } ++ if (!cont) { ++ if (ubifs_zn_dirty(znode)) ++ return znode; ++ return NULL; ++ } ++ } ++} ++ ++/** ++ * find_next_dirty - find next dirty znode. ++ * @znode: znode to begin searching from ++ */ ++static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode) ++{ ++ int n = znode->iip + 1; ++ ++ znode = znode->parent; ++ if (!znode) ++ return NULL; ++ for (; n < znode->child_cnt; n++) { ++ struct ubifs_zbranch *zbr = &znode->zbranch[n]; ++ ++ if (zbr->znode && ubifs_zn_dirty(zbr->znode)) ++ return find_first_dirty(zbr->znode); ++ } ++ return znode; ++} ++ ++/** ++ * get_znodes_to_commit - create list of dirty znodes to commit. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns the number of znodes to commit. ++ */ ++static int get_znodes_to_commit(struct ubifs_info *c) ++{ ++ struct ubifs_znode *znode, *cnext; ++ int cnt = 0; ++ ++ c->cnext = find_first_dirty(c->zroot.znode); ++ znode = c->enext = c->cnext; ++ if (!znode) { ++ dbg_cmt("no znodes to commit"); ++ return 0; ++ } ++ cnt += 1; ++ while (1) { ++ ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); ++ __set_bit(COW_ZNODE, &znode->flags); ++ znode->alt = 0; ++ cnext = find_next_dirty(znode); ++ if (!cnext) { ++ znode->cnext = c->cnext; ++ break; ++ } ++ znode->cnext = cnext; ++ znode = cnext; ++ cnt += 1; ++ } ++ dbg_cmt("committing %d znodes", cnt); ++ ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt)); ++ return cnt; ++} ++ ++/** ++ * alloc_idx_lebs - allocate empty LEBs to be used to commit. ++ * @c: UBIFS file-system description object ++ * @cnt: number of znodes to commit ++ * ++ * This function returns %-ENOSPC if it cannot allocate a sufficient number of ++ * empty LEBs. %0 is returned on success, otherwise a negative error code ++ * is returned. ++ */ ++static int alloc_idx_lebs(struct ubifs_info *c, int cnt) ++{ ++ int i, leb_cnt, lnum; ++ ++ c->ileb_cnt = 0; ++ c->ileb_nxt = 0; ++ leb_cnt = get_leb_cnt(c, cnt); ++ dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt); ++ if (!leb_cnt) ++ return 0; ++ c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS); ++ if (!c->ilebs) ++ return -ENOMEM; ++ for (i = 0; i < leb_cnt; i++) { ++ lnum = ubifs_find_free_leb_for_idx(c); ++ if (lnum < 0) ++ return lnum; ++ c->ilebs[c->ileb_cnt++] = lnum; ++ dbg_cmt("LEB %d", lnum); ++ } ++ if (dbg_force_in_the_gaps()) ++ return -ENOSPC; ++ return 0; ++} ++ ++/** ++ * free_unused_idx_lebs - free unused LEBs that were allocated for the commit. ++ * @c: UBIFS file-system description object ++ * ++ * It is possible that we allocate more empty LEBs for the commit than we need. ++ * This functions frees the surplus. ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int free_unused_idx_lebs(struct ubifs_info *c) ++{ ++ int i, err = 0, lnum, er; ++ ++ for (i = c->ileb_nxt; i < c->ileb_cnt; i++) { ++ lnum = c->ilebs[i]; ++ dbg_cmt("LEB %d", lnum); ++ er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_INDEX | LPROPS_TAKEN, 0); ++ if (!err) ++ err = er; ++ } ++ return err; ++} ++ ++/** ++ * free_idx_lebs - free unused LEBs after commit end. ++ * @c: UBIFS file-system description object ++ * ++ * This function returns %0 on success and a negative error code on failure. ++ */ ++static int free_idx_lebs(struct ubifs_info *c) ++{ ++ int err; ++ ++ err = free_unused_idx_lebs(c); ++ kfree(c->ilebs); ++ c->ilebs = NULL; ++ return err; ++} ++ ++/** ++ * ubifs_tnc_start_commit - start TNC commit. ++ * @c: UBIFS file-system description object ++ * @zroot: new index root position is returned here ++ * ++ * This function prepares the list of indexing nodes to commit and lays out ++ * their positions on flash. If there is not enough free space it uses the ++ * in-gap commit method. Returns zero in case of success and a negative error ++ * code in case of failure. ++ */ ++int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) ++{ ++ int err = 0, cnt; ++ ++ mutex_lock(&c->tnc_mutex); ++ err = dbg_check_tnc(c, 1); ++ if (err) ++ goto out; ++ cnt = get_znodes_to_commit(c); ++ if (cnt != 0) { ++ int no_space = 0; ++ ++ err = alloc_idx_lebs(c, cnt); ++ if (err == -ENOSPC) ++ no_space = 1; ++ else if (err) ++ goto out_free; ++ err = layout_commit(c, no_space, cnt); ++ if (err) ++ goto out_free; ++ ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); ++ err = free_unused_idx_lebs(c); ++ if (err) ++ goto out; ++ } ++ destroy_old_idx(c); ++ memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch)); ++ ++ err = ubifs_save_dirty_idx_lnums(c); ++ if (err) ++ goto out; ++ ++ spin_lock(&c->space_lock); ++ /* ++ * Although we have not finished committing yet, update size of the ++ * committed index ('c->old_idx_sz') and zero out the index growth ++ * budget. It is OK to do this now, because we've reserved all the ++ * space which is needed to commit the index, and it is save for the ++ * budgeting subsystem to assume the index is already committed, ++ * even though it is not. ++ */ ++ ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ c->old_idx_sz = c->calc_idx_sz; ++ c->budg_uncommitted_idx = 0; ++ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ spin_unlock(&c->space_lock); ++ mutex_unlock(&c->tnc_mutex); ++ ++ dbg_cmt("number of index LEBs %d", c->lst.idx_lebs); ++ dbg_cmt("size of index %llu", c->calc_idx_sz); ++ return err; ++ ++out_free: ++ free_idx_lebs(c); ++out: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++/** ++ * write_index - write index nodes. ++ * @c: UBIFS file-system description object ++ * ++ * This function writes the index nodes whose positions were laid out in the ++ * layout_in_empty_space function. ++ */ ++static int write_index(struct ubifs_info *c) ++{ ++ struct ubifs_idx_node *idx; ++ struct ubifs_znode *znode, *cnext; ++ int i, lnum, offs, len, next_len, buf_len, buf_offs, used; ++ int avail, wlen, err, lnum_pos = 0; ++ ++ cnext = c->enext; ++ if (!cnext) ++ return 0; ++ ++ /* ++ * Always write index nodes to the index head so that index nodes and ++ * other types of nodes are never mixed in the same erase block. ++ */ ++ lnum = c->ihead_lnum; ++ buf_offs = c->ihead_offs; ++ ++ /* Allocate commit buffer */ ++ buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size); ++ used = 0; ++ avail = buf_len; ++ ++ /* Ensure there is enough room for first write */ ++ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); ++ if (buf_offs + next_len > c->leb_size) { ++ err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0, ++ LPROPS_TAKEN); ++ if (err) ++ return err; ++ lnum = -1; ++ } ++ ++ while (1) { ++ cond_resched(); ++ ++ znode = cnext; ++ idx = c->cbuf + used; ++ ++ /* Make index node */ ++ idx->ch.node_type = UBIFS_IDX_NODE; ++ idx->child_cnt = cpu_to_le16(znode->child_cnt); ++ idx->level = cpu_to_le16(znode->level); ++ for (i = 0; i < znode->child_cnt; i++) { ++ struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); ++ struct ubifs_zbranch *zbr = &znode->zbranch[i]; ++ ++ key_write_idx(c, &zbr->key, &br->key); ++ br->lnum = cpu_to_le32(zbr->lnum); ++ br->offs = cpu_to_le32(zbr->offs); ++ br->len = cpu_to_le32(zbr->len); ++ if (!zbr->lnum || !zbr->len) { ++ ubifs_err("bad ref in znode"); ++ dbg_dump_znode(c, znode); ++ if (zbr->znode) ++ dbg_dump_znode(c, zbr->znode); ++ } ++ } ++ len = ubifs_idx_node_sz(c, znode->child_cnt); ++ ubifs_prepare_node(c, idx, len, 0); ++ ++ /* Determine the index node position */ ++ if (lnum == -1) { ++ lnum = c->ilebs[lnum_pos++]; ++ buf_offs = 0; ++ used = 0; ++ avail = buf_len; ++ } ++ offs = buf_offs + used; ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ if (lnum != znode->lnum || offs != znode->offs || ++ len != znode->len) { ++ ubifs_err("inconsistent znode posn"); ++ return -EINVAL; ++ } ++#endif ++ ++ /* Grab some stuff from znode while we still can */ ++ cnext = znode->cnext; ++ ++ ubifs_assert(ubifs_zn_dirty(znode)); ++ ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); ++ ++ /* ++ * It is important that other threads should see %DIRTY_ZNODE ++ * flag cleared before %COW_ZNODE. Specifically, it matters in ++ * the 'dirty_cow_znode()' function. This is the reason for the ++ * first barrier. Also, we want the bit changes to be seen to ++ * other threads ASAP, to avoid unnecesarry copying, which is ++ * the reason for the second barrier. ++ */ ++ clear_bit(DIRTY_ZNODE, &znode->flags); ++ smp_mb__before_clear_bit(); ++ clear_bit(COW_ZNODE, &znode->flags); ++ smp_mb__after_clear_bit(); ++ ++ /* Do not access znode from this point on */ ++ ++ /* Update buffer positions */ ++ wlen = used + len; ++ used += ALIGN(len, 8); ++ avail -= ALIGN(len, 8); ++ ++ /* ++ * Calculate the next index node length to see if there is ++ * enough room for it ++ */ ++ if (cnext == c->cnext) ++ next_len = 0; ++ else ++ next_len = ubifs_idx_node_sz(c, cnext->child_cnt); ++ ++ if (c->min_io_size == 1) { ++ /* ++ * Write the prepared index node immediately if there is ++ * no minimum IO size ++ */ ++ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, ++ wlen, UBI_SHORTTERM); ++ if (err) ++ return err; ++ buf_offs += ALIGN(wlen, 8); ++ if (next_len) { ++ used = 0; ++ avail = buf_len; ++ if (buf_offs + next_len > c->leb_size) { ++ err = ubifs_update_one_lp(c, lnum, ++ LPROPS_NC, 0, 0, LPROPS_TAKEN); ++ if (err) ++ return err; ++ lnum = -1; ++ } ++ continue; ++ } ++ } else { ++ int blen, nxt_offs = buf_offs + used + next_len; ++ ++ if (next_len && nxt_offs <= c->leb_size) { ++ if (avail > 0) ++ continue; ++ else ++ blen = buf_len; ++ } else { ++ wlen = ALIGN(wlen, 8); ++ blen = ALIGN(wlen, c->min_io_size); ++ ubifs_pad(c, c->cbuf + wlen, blen - wlen); ++ } ++ /* ++ * The buffer is full or there are no more znodes ++ * to do ++ */ ++ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, ++ blen, UBI_SHORTTERM); ++ if (err) ++ return err; ++ buf_offs += blen; ++ if (next_len) { ++ if (nxt_offs > c->leb_size) { ++ err = ubifs_update_one_lp(c, lnum, ++ LPROPS_NC, 0, 0, LPROPS_TAKEN); ++ if (err) ++ return err; ++ lnum = -1; ++ } ++ used -= blen; ++ if (used < 0) ++ used = 0; ++ avail = buf_len - used; ++ memmove(c->cbuf, c->cbuf + blen, used); ++ continue; ++ } ++ } ++ break; ++ } ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ if (lnum != c->dbg->new_ihead_lnum || ++ buf_offs != c->dbg->new_ihead_offs) { ++ ubifs_err("inconsistent ihead"); ++ return -EINVAL; ++ } ++#endif ++ ++ c->ihead_lnum = lnum; ++ c->ihead_offs = buf_offs; ++ ++ return 0; ++} ++ ++/** ++ * free_obsolete_znodes - free obsolete znodes. ++ * @c: UBIFS file-system description object ++ * ++ * At the end of commit end, obsolete znodes are freed. ++ */ ++static void free_obsolete_znodes(struct ubifs_info *c) ++{ ++ struct ubifs_znode *znode, *cnext; ++ ++ cnext = c->cnext; ++ do { ++ znode = cnext; ++ cnext = znode->cnext; ++ if (test_bit(OBSOLETE_ZNODE, &znode->flags)) ++ kfree(znode); ++ else { ++ znode->cnext = NULL; ++ atomic_long_inc(&c->clean_zn_cnt); ++ atomic_long_inc(&ubifs_clean_zn_cnt); ++ } ++ } while (cnext != c->cnext); ++} ++ ++/** ++ * return_gap_lebs - return LEBs used by the in-gap commit method. ++ * @c: UBIFS file-system description object ++ * ++ * This function clears the "taken" flag for the LEBs which were used by the ++ * "commit in-the-gaps" method. ++ */ ++static int return_gap_lebs(struct ubifs_info *c) ++{ ++ int *p, err; ++ ++ if (!c->gap_lebs) ++ return 0; ++ ++ dbg_cmt(""); ++ for (p = c->gap_lebs; *p != -1; p++) { ++ err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_TAKEN, 0); ++ if (err) ++ return err; ++ } ++ ++ kfree(c->gap_lebs); ++ c->gap_lebs = NULL; ++ return 0; ++} ++ ++/** ++ * ubifs_tnc_end_commit - update the TNC for commit end. ++ * @c: UBIFS file-system description object ++ * ++ * Write the dirty znodes. ++ */ ++int ubifs_tnc_end_commit(struct ubifs_info *c) ++{ ++ int err; ++ ++ if (!c->cnext) ++ return 0; ++ ++ err = return_gap_lebs(c); ++ if (err) ++ return err; ++ ++ err = write_index(c); ++ if (err) ++ return err; ++ ++ mutex_lock(&c->tnc_mutex); ++ ++ dbg_cmt("TNC height is %d", c->zroot.znode->level + 1); ++ ++ free_obsolete_znodes(c); ++ ++ c->cnext = NULL; ++ kfree(c->ilebs); ++ c->ilebs = NULL; ++ ++ mutex_unlock(&c->tnc_mutex); ++ ++ return 0; ++} +diff -Nurd linux-2.6.24/fs/ubifs/tnc_misc.c ubifs-v2.6.24/fs/ubifs/tnc_misc.c +--- linux-2.6.24/fs/ubifs/tnc_misc.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/tnc_misc.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,494 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Adrian Hunter ++ * Artem Bityutskiy (Битюцкий Артём) ++ */ ++ ++/* ++ * This file contains miscelanious TNC-related functions shared betweend ++ * different files. This file does not form any logically separate TNC ++ * sub-system. The file was created because there is a lot of TNC code and ++ * putting it all in one file would make that file too big and unreadable. ++ */ ++ ++#include "ubifs.h" ++ ++/** ++ * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal. ++ * @zr: root of the subtree to traverse ++ * @znode: previous znode ++ * ++ * This function implements levelorder TNC traversal. The LNC is ignored. ++ * Returns the next element or %NULL if @znode is already the last one. ++ */ ++struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, ++ struct ubifs_znode *znode) ++{ ++ int level, iip, level_search = 0; ++ struct ubifs_znode *zn; ++ ++ ubifs_assert(zr); ++ ++ if (unlikely(!znode)) ++ return zr; ++ ++ if (unlikely(znode == zr)) { ++ if (znode->level == 0) ++ return NULL; ++ return ubifs_tnc_find_child(zr, 0); ++ } ++ ++ level = znode->level; ++ ++ iip = znode->iip; ++ while (1) { ++ ubifs_assert(znode->level <= zr->level); ++ ++ /* ++ * First walk up until there is a znode with next branch to ++ * look at. ++ */ ++ while (znode->parent != zr && iip >= znode->parent->child_cnt) { ++ znode = znode->parent; ++ iip = znode->iip; ++ } ++ ++ if (unlikely(znode->parent == zr && ++ iip >= znode->parent->child_cnt)) { ++ /* This level is done, switch to the lower one */ ++ level -= 1; ++ if (level_search || level < 0) ++ /* ++ * We were already looking for znode at lower ++ * level ('level_search'). As we are here ++ * again, it just does not exist. Or all levels ++ * were finished ('level < 0'). ++ */ ++ return NULL; ++ ++ level_search = 1; ++ iip = -1; ++ znode = ubifs_tnc_find_child(zr, 0); ++ ubifs_assert(znode); ++ } ++ ++ /* Switch to the next index */ ++ zn = ubifs_tnc_find_child(znode->parent, iip + 1); ++ if (!zn) { ++ /* No more children to look at, we have walk up */ ++ iip = znode->parent->child_cnt; ++ continue; ++ } ++ ++ /* Walk back down to the level we came from ('level') */ ++ while (zn->level != level) { ++ znode = zn; ++ zn = ubifs_tnc_find_child(zn, 0); ++ if (!zn) { ++ /* ++ * This path is not too deep so it does not ++ * reach 'level'. Try next path. ++ */ ++ iip = znode->iip; ++ break; ++ } ++ } ++ ++ if (zn) { ++ ubifs_assert(zn->level >= 0); ++ return zn; ++ } ++ } ++} ++ ++/** ++ * ubifs_search_zbranch - search znode branch. ++ * @c: UBIFS file-system description object ++ * @znode: znode to search in ++ * @key: key to search for ++ * @n: znode branch slot number is returned here ++ * ++ * This is a helper function which search branch with key @key in @znode using ++ * binary search. The result of the search may be: ++ * o exact match, then %1 is returned, and the slot number of the branch is ++ * stored in @n; ++ * o no exact match, then %0 is returned and the slot number of the left ++ * closest branch is returned in @n; the slot if all keys in this znode are ++ * greater than @key, then %-1 is returned in @n. ++ */ ++int ubifs_search_zbranch(const struct ubifs_info *c, ++ const struct ubifs_znode *znode, ++ const union ubifs_key *key, int *n) ++{ ++ int beg = 0, end = znode->child_cnt, uninitialized_var(mid); ++ int uninitialized_var(cmp); ++ const struct ubifs_zbranch *zbr = &znode->zbranch[0]; ++ ++ ubifs_assert(end > beg); ++ ++ while (end > beg) { ++ mid = (beg + end) >> 1; ++ cmp = keys_cmp(c, key, &zbr[mid].key); ++ if (cmp > 0) ++ beg = mid + 1; ++ else if (cmp < 0) ++ end = mid; ++ else { ++ *n = mid; ++ return 1; ++ } ++ } ++ ++ *n = end - 1; ++ ++ /* The insert point is after *n */ ++ ubifs_assert(*n >= -1 && *n < znode->child_cnt); ++ if (*n == -1) ++ ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0); ++ else ++ ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0); ++ if (*n + 1 < znode->child_cnt) ++ ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0); ++ ++ return 0; ++} ++ ++/** ++ * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal. ++ * @znode: znode to start at (root of the sub-tree to traverse) ++ * ++ * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is ++ * ignored. ++ */ ++struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode) ++{ ++ if (unlikely(!znode)) ++ return NULL; ++ ++ while (znode->level > 0) { ++ struct ubifs_znode *child; ++ ++ child = ubifs_tnc_find_child(znode, 0); ++ if (!child) ++ return znode; ++ znode = child; ++ } ++ ++ return znode; ++} ++ ++/** ++ * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal. ++ * @znode: previous znode ++ * ++ * This function implements postorder TNC traversal. The LNC is ignored. ++ * Returns the next element or %NULL if @znode is already the last one. ++ */ ++struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) ++{ ++ struct ubifs_znode *zn; ++ ++ ubifs_assert(znode); ++ if (unlikely(!znode->parent)) ++ return NULL; ++ ++ /* Switch to the next index in the parent */ ++ zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1); ++ if (!zn) ++ /* This is in fact the last child, return parent */ ++ return znode->parent; ++ ++ /* Go to the first znode in this new subtree */ ++ return ubifs_tnc_postorder_first(zn); ++} ++ ++/** ++ * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree. ++ * @znode: znode defining subtree to destroy ++ * ++ * This function destroys subtree of the TNC tree. Returns number of clean ++ * znodes in the subtree. ++ */ ++long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode) ++{ ++ struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode); ++ long clean_freed = 0; ++ int n; ++ ++ ubifs_assert(zn); ++ while (1) { ++ for (n = 0; n < zn->child_cnt; n++) { ++ if (!zn->zbranch[n].znode) ++ continue; ++ ++ if (zn->level > 0 && ++ !ubifs_zn_dirty(zn->zbranch[n].znode)) ++ clean_freed += 1; ++ ++ cond_resched(); ++ kfree(zn->zbranch[n].znode); ++ } ++ ++ if (zn == znode) { ++ if (!ubifs_zn_dirty(zn)) ++ clean_freed += 1; ++ kfree(zn); ++ return clean_freed; ++ } ++ ++ zn = ubifs_tnc_postorder_next(zn); ++ } ++} ++ ++/** ++ * read_znode - read an indexing node from flash and fill znode. ++ * @c: UBIFS file-system description object ++ * @lnum: LEB of the indexing node to read ++ * @offs: node offset ++ * @len: node length ++ * @znode: znode to read to ++ * ++ * This function reads an indexing node from the flash media and fills znode ++ * with the read data. Returns zero in case of success and a negative error ++ * code in case of failure. The read indexing node is validated and if anything ++ * is wrong with it, this function prints complaint messages and returns ++ * %-EINVAL. ++ */ ++static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, ++ struct ubifs_znode *znode) ++{ ++ int i, err, type, cmp; ++ struct ubifs_idx_node *idx; ++ ++ idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); ++ if (!idx) ++ return -ENOMEM; ++ ++ err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); ++ if (err < 0) { ++ kfree(idx); ++ return err; ++ } ++ ++ znode->child_cnt = le16_to_cpu(idx->child_cnt); ++ znode->level = le16_to_cpu(idx->level); ++ ++ dbg_tnc("LEB %d:%d, level %d, %d branch", ++ lnum, offs, znode->level, znode->child_cnt); ++ ++ if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { ++ dbg_err("current fanout %d, branch count %d", ++ c->fanout, znode->child_cnt); ++ dbg_err("max levels %d, znode level %d", ++ UBIFS_MAX_LEVELS, znode->level); ++ err = 1; ++ goto out_dump; ++ } ++ ++ for (i = 0; i < znode->child_cnt; i++) { ++ const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); ++ struct ubifs_zbranch *zbr = &znode->zbranch[i]; ++ ++ key_read(c, &br->key, &zbr->key); ++ zbr->lnum = le32_to_cpu(br->lnum); ++ zbr->offs = le32_to_cpu(br->offs); ++ zbr->len = le32_to_cpu(br->len); ++ zbr->znode = NULL; ++ ++ /* Validate branch */ ++ ++ if (zbr->lnum < c->main_first || ++ zbr->lnum >= c->leb_cnt || zbr->offs < 0 || ++ zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { ++ dbg_err("bad branch %d", i); ++ err = 2; ++ goto out_dump; ++ } ++ ++ switch (key_type(c, &zbr->key)) { ++ case UBIFS_INO_KEY: ++ case UBIFS_DATA_KEY: ++ case UBIFS_DENT_KEY: ++ case UBIFS_XENT_KEY: ++ break; ++ default: ++ dbg_msg("bad key type at slot %d: %s", i, ++ DBGKEY(&zbr->key)); ++ err = 3; ++ goto out_dump; ++ } ++ ++ if (znode->level) ++ continue; ++ ++ type = key_type(c, &zbr->key); ++ if (c->ranges[type].max_len == 0) { ++ if (zbr->len != c->ranges[type].len) { ++ dbg_err("bad target node (type %d) length (%d)", ++ type, zbr->len); ++ dbg_err("have to be %d", c->ranges[type].len); ++ err = 4; ++ goto out_dump; ++ } ++ } else if (zbr->len < c->ranges[type].min_len || ++ zbr->len > c->ranges[type].max_len) { ++ dbg_err("bad target node (type %d) length (%d)", ++ type, zbr->len); ++ dbg_err("have to be in range of %d-%d", ++ c->ranges[type].min_len, ++ c->ranges[type].max_len); ++ err = 5; ++ goto out_dump; ++ } ++ } ++ ++ /* ++ * Ensure that the next key is greater or equivalent to the ++ * previous one. ++ */ ++ for (i = 0; i < znode->child_cnt - 1; i++) { ++ const union ubifs_key *key1, *key2; ++ ++ key1 = &znode->zbranch[i].key; ++ key2 = &znode->zbranch[i + 1].key; ++ ++ cmp = keys_cmp(c, key1, key2); ++ if (cmp > 0) { ++ dbg_err("bad key order (keys %d and %d)", i, i + 1); ++ err = 6; ++ goto out_dump; ++ } else if (cmp == 0 && !is_hash_key(c, key1)) { ++ /* These can only be keys with colliding hash */ ++ dbg_err("keys %d and %d are not hashed but equivalent", ++ i, i + 1); ++ err = 7; ++ goto out_dump; ++ } ++ } ++ ++ kfree(idx); ++ return 0; ++ ++out_dump: ++ ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); ++ dbg_dump_node(c, idx); ++ kfree(idx); ++ return -EINVAL; ++} ++ ++/** ++ * ubifs_load_znode - load znode to TNC cache. ++ * @c: UBIFS file-system description object ++ * @zbr: znode branch ++ * @parent: znode's parent ++ * @iip: index in parent ++ * ++ * This function loads znode pointed to by @zbr into the TNC cache and ++ * returns pointer to it in case of success and a negative error code in case ++ * of failure. ++ */ ++struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, ++ struct ubifs_zbranch *zbr, ++ struct ubifs_znode *parent, int iip) ++{ ++ int err; ++ struct ubifs_znode *znode; ++ ++ ubifs_assert(!zbr->znode); ++ /* ++ * A slab cache is not presently used for znodes because the znode size ++ * depends on the fanout which is stored in the superblock. ++ */ ++ znode = kzalloc(c->max_znode_sz, GFP_NOFS); ++ if (!znode) ++ return ERR_PTR(-ENOMEM); ++ ++ err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode); ++ if (err) ++ goto out; ++ ++ atomic_long_inc(&c->clean_zn_cnt); ++ ++ /* ++ * Increment the global clean znode counter as well. It is OK that ++ * global and per-FS clean znode counters may be inconsistent for some ++ * short time (because we might be preempted at this point), the global ++ * one is only used in shrinker. ++ */ ++ atomic_long_inc(&ubifs_clean_zn_cnt); ++ ++ zbr->znode = znode; ++ znode->parent = parent; ++ znode->time = get_seconds(); ++ znode->iip = iip; ++ ++ return znode; ++ ++out: ++ kfree(znode); ++ return ERR_PTR(err); ++} ++ ++/** ++ * ubifs_tnc_read_node - read a leaf node from the flash media. ++ * @c: UBIFS file-system description object ++ * @zbr: key and position of the node ++ * @node: node is returned here ++ * ++ * This function reads a node defined by @zbr from the flash media. Returns ++ * zero in case of success or a negative negative error code in case of ++ * failure. ++ */ ++int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ void *node) ++{ ++ union ubifs_key key1, *key = &zbr->key; ++ int err, type = key_type(c, key); ++ struct ubifs_wbuf *wbuf; ++ ++ /* ++ * 'zbr' has to point to on-flash node. The node may sit in a bud and ++ * may even be in a write buffer, so we have to take care about this. ++ */ ++ wbuf = ubifs_get_wbuf(c, zbr->lnum); ++ if (wbuf) ++ err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len, ++ zbr->lnum, zbr->offs); ++ else ++ err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, ++ zbr->offs); ++ ++ if (err) { ++ dbg_tnc("key %s", DBGKEY(key)); ++ return err; ++ } ++ ++ /* Make sure the key of the read node is correct */ ++ key_read(c, node + UBIFS_KEY_OFFSET, &key1); ++ if (!keys_eq(c, key, &key1)) { ++ ubifs_err("bad key in node at LEB %d:%d", ++ zbr->lnum, zbr->offs); ++ dbg_tnc("looked for key %s found node's key %s", ++ DBGKEY(key), DBGKEY1(&key1)); ++ dbg_dump_node(c, node); ++ return -EINVAL; ++ } ++ ++ return 0; ++} +diff -Nurd linux-2.6.24/fs/ubifs/ubifs-media.h ubifs-v2.6.24/fs/ubifs/ubifs-media.h +--- linux-2.6.24/fs/ubifs/ubifs-media.h 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/ubifs-media.h 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,775 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file describes UBIFS on-flash format and contains definitions of all the ++ * relevant data structures and constants. ++ * ++ * All UBIFS on-flash objects are stored in the form of nodes. All nodes start ++ * with the UBIFS node magic number and have the same common header. Nodes ++ * always sit at 8-byte aligned positions on the media and node header sizes are ++ * also 8-byte aligned (except for the indexing node and the padding node). ++ */ ++ ++#ifndef __UBIFS_MEDIA_H__ ++#define __UBIFS_MEDIA_H__ ++ ++/* UBIFS node magic number (must not have the padding byte first or last) */ ++#define UBIFS_NODE_MAGIC 0x06101831 ++ ++/* ++ * UBIFS on-flash format version. This version is increased when the on-flash ++ * format is changing. If this happens, UBIFS is will support older versions as ++ * well. But older UBIFS code will not support newer formats. Format changes ++ * will be rare and only when absolutely necessary, e.g. to fix a bug or to add ++ * a new feature. ++ * ++ * UBIFS went into mainline kernel with format version 4. The older formats ++ * were development formats. ++ */ ++#define UBIFS_FORMAT_VERSION 4 ++ ++/* ++ * Read-only compatibility version. If the UBIFS format is changed, older UBIFS ++ * implementations will not be able to mount newer formats in read-write mode. ++ * However, depending on the change, it may be possible to mount newer formats ++ * in R/O mode. This is indicated by the R/O compatibility version which is ++ * stored in the super-block. ++ * ++ * This is needed to support boot-loaders which only need R/O mounting. With ++ * this flag it is possible to do UBIFS format changes without a need to update ++ * boot-loaders. ++ */ ++#define UBIFS_RO_COMPAT_VERSION 0 ++ ++/* Minimum logical eraseblock size in bytes */ ++#define UBIFS_MIN_LEB_SZ (15*1024) ++ ++/* Initial CRC32 value used when calculating CRC checksums */ ++#define UBIFS_CRC32_INIT 0xFFFFFFFFU ++ ++/* ++ * UBIFS does not try to compress data if its length is less than the below ++ * constant. ++ */ ++#define UBIFS_MIN_COMPR_LEN 128 ++ ++/* ++ * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes ++ * shorter than uncompressed data length, UBIFS prefers to leave this data ++ * node uncompress, because it'll be read faster. ++ */ ++#define UBIFS_MIN_COMPRESS_DIFF 64 ++ ++/* Root inode number */ ++#define UBIFS_ROOT_INO 1 ++ ++/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */ ++#define UBIFS_FIRST_INO 64 ++ ++/* ++ * Maximum file name and extended attribute length (must be a multiple of 8, ++ * minus 1). ++ */ ++#define UBIFS_MAX_NLEN 255 ++ ++/* Maximum number of data journal heads */ ++#define UBIFS_MAX_JHEADS 1 ++ ++/* ++ * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system, ++ * which means that it does not treat the underlying media as consisting of ++ * blocks like in case of hard drives. Do not be confused. UBIFS block is just ++ * the maximum amount of data which one data node can have or which can be ++ * attached to an inode node. ++ */ ++#define UBIFS_BLOCK_SIZE 4096 ++#define UBIFS_BLOCK_SHIFT 12 ++ ++/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ ++#define UBIFS_PADDING_BYTE 0xCE ++ ++/* Maximum possible key length */ ++#define UBIFS_MAX_KEY_LEN 16 ++ ++/* Key length ("simple" format) */ ++#define UBIFS_SK_LEN 8 ++ ++/* Minimum index tree fanout */ ++#define UBIFS_MIN_FANOUT 3 ++ ++/* Maximum number of levels in UBIFS indexing B-tree */ ++#define UBIFS_MAX_LEVELS 512 ++ ++/* Maximum amount of data attached to an inode in bytes */ ++#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE ++ ++/* LEB Properties Tree fanout (must be power of 2) and fanout shift */ ++#define UBIFS_LPT_FANOUT 4 ++#define UBIFS_LPT_FANOUT_SHIFT 2 ++ ++/* LEB Properties Tree bit field sizes */ ++#define UBIFS_LPT_CRC_BITS 16 ++#define UBIFS_LPT_CRC_BYTES 2 ++#define UBIFS_LPT_TYPE_BITS 4 ++ ++/* The key is always at the same position in all keyed nodes */ ++#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) ++ ++/* ++ * LEB Properties Tree node types. ++ * ++ * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties) ++ * UBIFS_LPT_NNODE: LPT internal node ++ * UBIFS_LPT_LTAB: LPT's own lprops table ++ * UBIFS_LPT_LSAVE: LPT's save table (big model only) ++ * UBIFS_LPT_NODE_CNT: count of LPT node types ++ * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type ++ */ ++enum { ++ UBIFS_LPT_PNODE, ++ UBIFS_LPT_NNODE, ++ UBIFS_LPT_LTAB, ++ UBIFS_LPT_LSAVE, ++ UBIFS_LPT_NODE_CNT, ++ UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1, ++}; ++ ++/* ++ * UBIFS inode types. ++ * ++ * UBIFS_ITYPE_REG: regular file ++ * UBIFS_ITYPE_DIR: directory ++ * UBIFS_ITYPE_LNK: soft link ++ * UBIFS_ITYPE_BLK: block device node ++ * UBIFS_ITYPE_CHR: character device node ++ * UBIFS_ITYPE_FIFO: fifo ++ * UBIFS_ITYPE_SOCK: socket ++ * UBIFS_ITYPES_CNT: count of supported file types ++ */ ++enum { ++ UBIFS_ITYPE_REG, ++ UBIFS_ITYPE_DIR, ++ UBIFS_ITYPE_LNK, ++ UBIFS_ITYPE_BLK, ++ UBIFS_ITYPE_CHR, ++ UBIFS_ITYPE_FIFO, ++ UBIFS_ITYPE_SOCK, ++ UBIFS_ITYPES_CNT, ++}; ++ ++/* ++ * Supported key hash functions. ++ * ++ * UBIFS_KEY_HASH_R5: R5 hash ++ * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name ++ */ ++enum { ++ UBIFS_KEY_HASH_R5, ++ UBIFS_KEY_HASH_TEST, ++}; ++ ++/* ++ * Supported key formats. ++ * ++ * UBIFS_SIMPLE_KEY_FMT: simple key format ++ */ ++enum { ++ UBIFS_SIMPLE_KEY_FMT, ++}; ++ ++/* ++ * The simple key format uses 29 bits for storing UBIFS block number and hash ++ * value. ++ */ ++#define UBIFS_S_KEY_BLOCK_BITS 29 ++#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF ++#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS ++#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK ++ ++/* ++ * Key types. ++ * ++ * UBIFS_INO_KEY: inode node key ++ * UBIFS_DATA_KEY: data node key ++ * UBIFS_DENT_KEY: directory entry node key ++ * UBIFS_XENT_KEY: extended attribute entry key ++ * UBIFS_KEY_TYPES_CNT: number of supported key types ++ */ ++enum { ++ UBIFS_INO_KEY, ++ UBIFS_DATA_KEY, ++ UBIFS_DENT_KEY, ++ UBIFS_XENT_KEY, ++ UBIFS_KEY_TYPES_CNT, ++}; ++ ++/* Count of LEBs reserved for the superblock area */ ++#define UBIFS_SB_LEBS 1 ++/* Count of LEBs reserved for the master area */ ++#define UBIFS_MST_LEBS 2 ++ ++/* First LEB of the superblock area */ ++#define UBIFS_SB_LNUM 0 ++/* First LEB of the master area */ ++#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS) ++/* First LEB of the log area */ ++#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS) ++ ++/* ++ * The below constants define the absolute minimum values for various UBIFS ++ * media areas. Many of them actually depend of flash geometry and the FS ++ * configuration (number of journal heads, orphan LEBs, etc). This means that ++ * the smallest volume size which can be used for UBIFS cannot be pre-defined ++ * by these constants. The file-system that meets the below limitation will not ++ * necessarily mount. UBIFS does run-time calculations and validates the FS ++ * size. ++ */ ++ ++/* Minimum number of logical eraseblocks in the log */ ++#define UBIFS_MIN_LOG_LEBS 2 ++/* Minimum number of bud logical eraseblocks (one for each head) */ ++#define UBIFS_MIN_BUD_LEBS 3 ++/* Minimum number of journal logical eraseblocks */ ++#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS) ++/* Minimum number of LPT area logical eraseblocks */ ++#define UBIFS_MIN_LPT_LEBS 2 ++/* Minimum number of orphan area logical eraseblocks */ ++#define UBIFS_MIN_ORPH_LEBS 1 ++/* ++ * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1 ++ * for GC, 1 for deletions, and at least 1 for committed data). ++ */ ++#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6) ++ ++/* Minimum number of logical eraseblocks */ ++#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ ++ UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \ ++ UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS) ++ ++/* Node sizes (N.B. these are guaranteed to be multiples of 8) */ ++#define UBIFS_CH_SZ sizeof(struct ubifs_ch) ++#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node) ++#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node) ++#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node) ++#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node) ++#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node) ++#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node) ++#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node) ++#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node) ++#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node) ++#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node) ++#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) ++/* Extended attribute entry nodes are identical to directory entry nodes */ ++#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ ++/* Only this does not have to be multiple of 8 bytes */ ++#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch) ++ ++/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */ ++#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) ++#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA) ++#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1) ++#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ ++ ++/* The largest UBIFS node */ ++#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ ++ ++/* ++ * On-flash inode flags. ++ * ++ * UBIFS_COMPR_FL: use compression for this inode ++ * UBIFS_SYNC_FL: I/O on this inode has to be synchronous ++ * UBIFS_IMMUTABLE_FL: inode is immutable ++ * UBIFS_APPEND_FL: writes to the inode may only append data ++ * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous ++ * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value ++ * ++ * Note, these are on-flash flags which correspond to ioctl flags ++ * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not ++ * have to be the same. ++ */ ++enum { ++ UBIFS_COMPR_FL = 0x01, ++ UBIFS_SYNC_FL = 0x02, ++ UBIFS_IMMUTABLE_FL = 0x04, ++ UBIFS_APPEND_FL = 0x08, ++ UBIFS_DIRSYNC_FL = 0x10, ++ UBIFS_XATTR_FL = 0x20, ++}; ++ ++/* Inode flag bits used by UBIFS */ ++#define UBIFS_FL_MASK 0x0000001F ++ ++/* ++ * UBIFS compression types. ++ * ++ * UBIFS_COMPR_NONE: no compression ++ * UBIFS_COMPR_LZO: LZO compression ++ * UBIFS_COMPR_ZLIB: ZLIB compression ++ * UBIFS_COMPR_TYPES_CNT: count of supported compression types ++ */ ++enum { ++ UBIFS_COMPR_NONE, ++ UBIFS_COMPR_LZO, ++ UBIFS_COMPR_ZLIB, ++ UBIFS_COMPR_TYPES_CNT, ++}; ++ ++/* ++ * UBIFS node types. ++ * ++ * UBIFS_INO_NODE: inode node ++ * UBIFS_DATA_NODE: data node ++ * UBIFS_DENT_NODE: directory entry node ++ * UBIFS_XENT_NODE: extended attribute node ++ * UBIFS_TRUN_NODE: truncation node ++ * UBIFS_PAD_NODE: padding node ++ * UBIFS_SB_NODE: superblock node ++ * UBIFS_MST_NODE: master node ++ * UBIFS_REF_NODE: LEB reference node ++ * UBIFS_IDX_NODE: index node ++ * UBIFS_CS_NODE: commit start node ++ * UBIFS_ORPH_NODE: orphan node ++ * UBIFS_NODE_TYPES_CNT: count of supported node types ++ * ++ * Note, we index arrays by these numbers, so keep them low and contiguous. ++ * Node type constants for inodes, direntries and so on have to be the same as ++ * corresponding key type constants. ++ */ ++enum { ++ UBIFS_INO_NODE, ++ UBIFS_DATA_NODE, ++ UBIFS_DENT_NODE, ++ UBIFS_XENT_NODE, ++ UBIFS_TRUN_NODE, ++ UBIFS_PAD_NODE, ++ UBIFS_SB_NODE, ++ UBIFS_MST_NODE, ++ UBIFS_REF_NODE, ++ UBIFS_IDX_NODE, ++ UBIFS_CS_NODE, ++ UBIFS_ORPH_NODE, ++ UBIFS_NODE_TYPES_CNT, ++}; ++ ++/* ++ * Master node flags. ++ * ++ * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty ++ * UBIFS_MST_NO_ORPHS: no orphan inodes present ++ * UBIFS_MST_RCVRY: written by recovery ++ */ ++enum { ++ UBIFS_MST_DIRTY = 1, ++ UBIFS_MST_NO_ORPHS = 2, ++ UBIFS_MST_RCVRY = 4, ++}; ++ ++/* ++ * Node group type (used by recovery to recover whole group or none). ++ * ++ * UBIFS_NO_NODE_GROUP: this node is not part of a group ++ * UBIFS_IN_NODE_GROUP: this node is a part of a group ++ * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group ++ */ ++enum { ++ UBIFS_NO_NODE_GROUP = 0, ++ UBIFS_IN_NODE_GROUP, ++ UBIFS_LAST_OF_NODE_GROUP, ++}; ++ ++/* ++ * Superblock flags. ++ * ++ * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set ++ */ ++enum { ++ UBIFS_FLG_BIGLPT = 0x02, ++}; ++ ++/** ++ * struct ubifs_ch - common header node. ++ * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC) ++ * @crc: CRC-32 checksum of the node header ++ * @sqnum: sequence number ++ * @len: full node length ++ * @node_type: node type ++ * @group_type: node group type ++ * @padding: reserved for future, zeroes ++ * ++ * Every UBIFS node starts with this common part. If the node has a key, the ++ * key always goes next. ++ */ ++struct ubifs_ch { ++ __le32 magic; ++ __le32 crc; ++ __le64 sqnum; ++ __le32 len; ++ __u8 node_type; ++ __u8 group_type; ++ __u8 padding[2]; ++} __attribute__ ((packed)); ++ ++/** ++ * union ubifs_dev_desc - device node descriptor. ++ * @new: new type device descriptor ++ * @huge: huge type device descriptor ++ * ++ * This data structure describes major/minor numbers of a device node. In an ++ * inode is a device node then its data contains an object of this type. UBIFS ++ * uses standard Linux "new" and "huge" device node encodings. ++ */ ++union ubifs_dev_desc { ++ __le32 new; ++ __le64 huge; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_ino_node - inode node. ++ * @ch: common header ++ * @key: node key ++ * @creat_sqnum: sequence number at time of creation ++ * @size: inode size in bytes (amount of uncompressed data) ++ * @atime_sec: access time seconds ++ * @ctime_sec: creation time seconds ++ * @mtime_sec: modification time seconds ++ * @atime_nsec: access time nanoseconds ++ * @ctime_nsec: creation time nanoseconds ++ * @mtime_nsec: modification time nanoseconds ++ * @nlink: number of hard links ++ * @uid: owner ID ++ * @gid: group ID ++ * @mode: access flags ++ * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc) ++ * @data_len: inode data length ++ * @xattr_cnt: count of extended attributes this inode has ++ * @xattr_size: summarized size of all extended attributes in bytes ++ * @padding1: reserved for future, zeroes ++ * @xattr_names: sum of lengths of all extended attribute names belonging to ++ * this inode ++ * @compr_type: compression type used for this inode ++ * @padding2: reserved for future, zeroes ++ * @data: data attached to the inode ++ * ++ * Note, even though inode compression type is defined by @compr_type, some ++ * nodes of this inode may be compressed with different compressor - this ++ * happens if compression type is changed while the inode already has data ++ * nodes. But @compr_type will be use for further writes to the inode. ++ * ++ * Note, do not forget to amend 'zero_ino_node_unused()' function when changing ++ * the padding fields. ++ */ ++struct ubifs_ino_node { ++ struct ubifs_ch ch; ++ __u8 key[UBIFS_MAX_KEY_LEN]; ++ __le64 creat_sqnum; ++ __le64 size; ++ __le64 atime_sec; ++ __le64 ctime_sec; ++ __le64 mtime_sec; ++ __le32 atime_nsec; ++ __le32 ctime_nsec; ++ __le32 mtime_nsec; ++ __le32 nlink; ++ __le32 uid; ++ __le32 gid; ++ __le32 mode; ++ __le32 flags; ++ __le32 data_len; ++ __le32 xattr_cnt; ++ __le32 xattr_size; ++ __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */ ++ __le32 xattr_names; ++ __le16 compr_type; ++ __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ ++ __u8 data[]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_dent_node - directory entry node. ++ * @ch: common header ++ * @key: node key ++ * @inum: target inode number ++ * @padding1: reserved for future, zeroes ++ * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc) ++ * @nlen: name length ++ * @padding2: reserved for future, zeroes ++ * @name: zero-terminated name ++ * ++ * Note, do not forget to amend 'zero_dent_node_unused()' function when ++ * changing the padding fields. ++ */ ++struct ubifs_dent_node { ++ struct ubifs_ch ch; ++ __u8 key[UBIFS_MAX_KEY_LEN]; ++ __le64 inum; ++ __u8 padding1; ++ __u8 type; ++ __le16 nlen; ++ __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ ++ __u8 name[]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_data_node - data node. ++ * @ch: common header ++ * @key: node key ++ * @size: uncompressed data size in bytes ++ * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc) ++ * @padding: reserved for future, zeroes ++ * @data: data ++ * ++ * Note, do not forget to amend 'zero_data_node_unused()' function when ++ * changing the padding fields. ++ */ ++struct ubifs_data_node { ++ struct ubifs_ch ch; ++ __u8 key[UBIFS_MAX_KEY_LEN]; ++ __le32 size; ++ __le16 compr_type; ++ __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ ++ __u8 data[]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_trun_node - truncation node. ++ * @ch: common header ++ * @inum: truncated inode number ++ * @padding: reserved for future, zeroes ++ * @old_size: size before truncation ++ * @new_size: size after truncation ++ * ++ * This node exists only in the journal and never goes to the main area. Note, ++ * do not forget to amend 'zero_trun_node_unused()' function when changing the ++ * padding fields. ++ */ ++struct ubifs_trun_node { ++ struct ubifs_ch ch; ++ __le32 inum; ++ __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ ++ __le64 old_size; ++ __le64 new_size; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_pad_node - padding node. ++ * @ch: common header ++ * @pad_len: how many bytes after this node are unused (because padded) ++ * @padding: reserved for future, zeroes ++ */ ++struct ubifs_pad_node { ++ struct ubifs_ch ch; ++ __le32 pad_len; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_sb_node - superblock node. ++ * @ch: common header ++ * @padding: reserved for future, zeroes ++ * @key_hash: type of hash function used in keys ++ * @key_fmt: format of the key ++ * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc) ++ * @min_io_size: minimal input/output unit size ++ * @leb_size: logical eraseblock size in bytes ++ * @leb_cnt: count of LEBs used by file-system ++ * @max_leb_cnt: maximum count of LEBs used by file-system ++ * @max_bud_bytes: maximum amount of data stored in buds ++ * @log_lebs: log size in logical eraseblocks ++ * @lpt_lebs: number of LEBs used for lprops table ++ * @orph_lebs: number of LEBs used for recording orphans ++ * @jhead_cnt: count of journal heads ++ * @fanout: tree fanout (max. number of links per indexing node) ++ * @lsave_cnt: number of LEB numbers in LPT's save table ++ * @fmt_version: UBIFS on-flash format version ++ * @default_compr: default compression ++ * @padding1: reserved for future, zeroes ++ * @rp_uid: reserve pool UID ++ * @rp_gid: reserve pool GID ++ * @rp_size: size of the reserved pool in bytes ++ * @padding2: reserved for future, zeroes ++ * @time_gran: time granularity in nanoseconds ++ * @uuid: UUID generated when the file system image was created ++ * @ro_compat_version: UBIFS R/O compatibility version ++ */ ++struct ubifs_sb_node { ++ struct ubifs_ch ch; ++ __u8 padding[2]; ++ __u8 key_hash; ++ __u8 key_fmt; ++ __le32 flags; ++ __le32 min_io_size; ++ __le32 leb_size; ++ __le32 leb_cnt; ++ __le32 max_leb_cnt; ++ __le64 max_bud_bytes; ++ __le32 log_lebs; ++ __le32 lpt_lebs; ++ __le32 orph_lebs; ++ __le32 jhead_cnt; ++ __le32 fanout; ++ __le32 lsave_cnt; ++ __le32 fmt_version; ++ __le16 default_compr; ++ __u8 padding1[2]; ++ __le32 rp_uid; ++ __le32 rp_gid; ++ __le64 rp_size; ++ __le32 time_gran; ++ __u8 uuid[16]; ++ __le32 ro_compat_version; ++ __u8 padding2[3968]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_mst_node - master node. ++ * @ch: common header ++ * @highest_inum: highest inode number in the committed index ++ * @cmt_no: commit number ++ * @flags: various flags (%UBIFS_MST_DIRTY, etc) ++ * @log_lnum: start of the log ++ * @root_lnum: LEB number of the root indexing node ++ * @root_offs: offset within @root_lnum ++ * @root_len: root indexing node length ++ * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was ++ * not reserved and should be reserved on mount) ++ * @ihead_lnum: LEB number of index head ++ * @ihead_offs: offset of index head ++ * @index_size: size of index on flash ++ * @total_free: total free space in bytes ++ * @total_dirty: total dirty space in bytes ++ * @total_used: total used space in bytes (includes only data LEBs) ++ * @total_dead: total dead space in bytes (includes only data LEBs) ++ * @total_dark: total dark space in bytes (includes only data LEBs) ++ * @lpt_lnum: LEB number of LPT root nnode ++ * @lpt_offs: offset of LPT root nnode ++ * @nhead_lnum: LEB number of LPT head ++ * @nhead_offs: offset of LPT head ++ * @ltab_lnum: LEB number of LPT's own lprops table ++ * @ltab_offs: offset of LPT's own lprops table ++ * @lsave_lnum: LEB number of LPT's save table (big model only) ++ * @lsave_offs: offset of LPT's save table (big model only) ++ * @lscan_lnum: LEB number of last LPT scan ++ * @empty_lebs: number of empty logical eraseblocks ++ * @idx_lebs: number of indexing logical eraseblocks ++ * @leb_cnt: count of LEBs used by file-system ++ * @padding: reserved for future, zeroes ++ */ ++struct ubifs_mst_node { ++ struct ubifs_ch ch; ++ __le64 highest_inum; ++ __le64 cmt_no; ++ __le32 flags; ++ __le32 log_lnum; ++ __le32 root_lnum; ++ __le32 root_offs; ++ __le32 root_len; ++ __le32 gc_lnum; ++ __le32 ihead_lnum; ++ __le32 ihead_offs; ++ __le64 index_size; ++ __le64 total_free; ++ __le64 total_dirty; ++ __le64 total_used; ++ __le64 total_dead; ++ __le64 total_dark; ++ __le32 lpt_lnum; ++ __le32 lpt_offs; ++ __le32 nhead_lnum; ++ __le32 nhead_offs; ++ __le32 ltab_lnum; ++ __le32 ltab_offs; ++ __le32 lsave_lnum; ++ __le32 lsave_offs; ++ __le32 lscan_lnum; ++ __le32 empty_lebs; ++ __le32 idx_lebs; ++ __le32 leb_cnt; ++ __u8 padding[344]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_ref_node - logical eraseblock reference node. ++ * @ch: common header ++ * @lnum: the referred logical eraseblock number ++ * @offs: start offset in the referred LEB ++ * @jhead: journal head number ++ * @padding: reserved for future, zeroes ++ */ ++struct ubifs_ref_node { ++ struct ubifs_ch ch; ++ __le32 lnum; ++ __le32 offs; ++ __le32 jhead; ++ __u8 padding[28]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_branch - key/reference/length branch ++ * @lnum: LEB number of the target node ++ * @offs: offset within @lnum ++ * @len: target node length ++ * @key: key ++ */ ++struct ubifs_branch { ++ __le32 lnum; ++ __le32 offs; ++ __le32 len; ++ __u8 key[]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_idx_node - indexing node. ++ * @ch: common header ++ * @child_cnt: number of child index nodes ++ * @level: tree level ++ * @branches: LEB number / offset / length / key branches ++ */ ++struct ubifs_idx_node { ++ struct ubifs_ch ch; ++ __le16 child_cnt; ++ __le16 level; ++ __u8 branches[]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_cs_node - commit start node. ++ * @ch: common header ++ * @cmt_no: commit number ++ */ ++struct ubifs_cs_node { ++ struct ubifs_ch ch; ++ __le64 cmt_no; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubifs_orph_node - orphan node. ++ * @ch: common header ++ * @cmt_no: commit number (also top bit is set on the last node of the commit) ++ * @inos: inode numbers of orphans ++ */ ++struct ubifs_orph_node { ++ struct ubifs_ch ch; ++ __le64 cmt_no; ++ __le64 inos[]; ++} __attribute__ ((packed)); ++ ++#endif /* __UBIFS_MEDIA_H__ */ +diff -Nurd linux-2.6.24/fs/ubifs/ubifs.h ubifs-v2.6.24/fs/ubifs/ubifs.h +--- linux-2.6.24/fs/ubifs/ubifs.h 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/ubifs.h 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,1729 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++#ifndef __UBIFS_H__ ++#define __UBIFS_H__ ++ ++#include <asm/div64.h> ++#include <linux/statfs.h> ++#include <linux/fs.h> ++#include <linux/err.h> ++#include <linux/sched.h> ++#include <linux/vmalloc.h> ++#include <linux/spinlock.h> ++#include <linux/mutex.h> ++#include <linux/rwsem.h> ++#include <linux/mtd/ubi.h> ++#include <linux/pagemap.h> ++#include <linux/backing-dev.h> ++#include "ubifs-media.h" ++ ++/* Version of this UBIFS implementation */ ++#define UBIFS_VERSION 1 ++ ++/* Normal UBIFS messages */ ++#define ubifs_msg(fmt, ...) \ ++ printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) ++/* UBIFS error messages */ ++#define ubifs_err(fmt, ...) \ ++ printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ ++ __func__, ##__VA_ARGS__) ++/* UBIFS warning messages */ ++#define ubifs_warn(fmt, ...) \ ++ printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ ++ current->pid, __func__, ##__VA_ARGS__) ++ ++/* UBIFS file system VFS magic number */ ++#define UBIFS_SUPER_MAGIC 0x24051905 ++ ++/* Number of UBIFS blocks per VFS page */ ++#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE) ++#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT) ++ ++/* "File system end of life" sequence number watermark */ ++#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL ++#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL ++ ++/* ++ * Minimum amount of LEBs reserved for the index. At present the index needs at ++ * least 2 LEBs: one for the index head and one for in-the-gaps method (which ++ * currently does not cater for the index head and so excludes it from ++ * consideration). ++ */ ++#define MIN_INDEX_LEBS 2 ++ ++/* Minimum amount of data UBIFS writes to the flash */ ++#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) ++ ++/* ++ * Currently we do not support inode number overlapping and re-using, so this ++ * watermark defines dangerous inode number level. This should be fixed later, ++ * although it is difficult to exceed current limit. Another option is to use ++ * 64-bit inode numbers, but this means more overhead. ++ */ ++#define INUM_WARN_WATERMARK 0xFFF00000 ++#define INUM_WATERMARK 0xFFFFFF00 ++ ++/* Largest key size supported in this implementation */ ++#define CUR_MAX_KEY_LEN UBIFS_SK_LEN ++ ++/* Maximum number of entries in each LPT (LEB category) heap */ ++#define LPT_HEAP_SZ 256 ++ ++/* ++ * Background thread name pattern. The numbers are UBI device and volume ++ * numbers. ++ */ ++#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" ++ ++/* Default write-buffer synchronization timeout (5 secs) */ ++#define DEFAULT_WBUF_TIMEOUT (5 * HZ) ++ ++/* Maximum possible inode number (only 32-bit inodes are supported now) */ ++#define MAX_INUM 0xFFFFFFFF ++ ++/* Number of non-data journal heads */ ++#define NONDATA_JHEADS_CNT 2 ++ ++/* Garbage collector head */ ++#define GCHD 0 ++/* Base journal head number */ ++#define BASEHD 1 ++/* First "general purpose" journal head */ ++#define DATAHD 2 ++ ++/* 'No change' value for 'ubifs_change_lp()' */ ++#define LPROPS_NC 0x80000001 ++ ++/* ++ * There is no notion of truncation key because truncation nodes do not exist ++ * in TNC. However, when replaying, it is handy to introduce fake "truncation" ++ * keys for truncation nodes because the code becomes simpler. So we define ++ * %UBIFS_TRUN_KEY type. ++ */ ++#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT ++ ++/* ++ * How much a directory entry/extended attribute entry adds to the parent/host ++ * inode. ++ */ ++#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8) ++ ++/* How much an extended attribute adds to the host inode */ ++#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8) ++ ++/* ++ * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered ++ * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are ++ * considered "young". This is used by shrinker when selecting znode to trim ++ * off. ++ */ ++#define OLD_ZNODE_AGE 20 ++#define YOUNG_ZNODE_AGE 5 ++ ++/* ++ * Some compressors, like LZO, may end up with more data then the input buffer. ++ * So UBIFS always allocates larger output buffer, to be sure the compressor ++ * will not corrupt memory in case of worst case compression. ++ */ ++#define WORST_COMPR_FACTOR 2 ++ ++/* Maximum expected tree height for use by bottom_up_buf */ ++#define BOTTOM_UP_HEIGHT 64 ++ ++/* Maximum number of data nodes to bulk-read */ ++#define UBIFS_MAX_BULK_READ 32 ++ ++/* ++ * Lockdep classes for UBIFS inode @ui_mutex. ++ */ ++enum { ++ WB_MUTEX_1 = 0, ++ WB_MUTEX_2 = 1, ++ WB_MUTEX_3 = 2, ++}; ++ ++/* ++ * Znode flags (actually, bit numbers which store the flags). ++ * ++ * DIRTY_ZNODE: znode is dirty ++ * COW_ZNODE: znode is being committed and a new instance of this znode has to ++ * be created before changing this znode ++ * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is ++ * still in the commit list and the ongoing commit operation ++ * will commit it, and delete this znode after it is done ++ */ ++enum { ++ DIRTY_ZNODE = 0, ++ COW_ZNODE = 1, ++ OBSOLETE_ZNODE = 2, ++}; ++ ++/* ++ * Commit states. ++ * ++ * COMMIT_RESTING: commit is not wanted ++ * COMMIT_BACKGROUND: background commit has been requested ++ * COMMIT_REQUIRED: commit is required ++ * COMMIT_RUNNING_BACKGROUND: background commit is running ++ * COMMIT_RUNNING_REQUIRED: commit is running and it is required ++ * COMMIT_BROKEN: commit failed ++ */ ++enum { ++ COMMIT_RESTING = 0, ++ COMMIT_BACKGROUND, ++ COMMIT_REQUIRED, ++ COMMIT_RUNNING_BACKGROUND, ++ COMMIT_RUNNING_REQUIRED, ++ COMMIT_BROKEN, ++}; ++ ++/* ++ * 'ubifs_scan_a_node()' return values. ++ * ++ * SCANNED_GARBAGE: scanned garbage ++ * SCANNED_EMPTY_SPACE: scanned empty space ++ * SCANNED_A_NODE: scanned a valid node ++ * SCANNED_A_CORRUPT_NODE: scanned a corrupted node ++ * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length ++ * ++ * Greater than zero means: 'scanned that number of padding bytes' ++ */ ++enum { ++ SCANNED_GARBAGE = 0, ++ SCANNED_EMPTY_SPACE = -1, ++ SCANNED_A_NODE = -2, ++ SCANNED_A_CORRUPT_NODE = -3, ++ SCANNED_A_BAD_PAD_NODE = -4, ++}; ++ ++/* ++ * LPT cnode flag bits. ++ * ++ * DIRTY_CNODE: cnode is dirty ++ * COW_CNODE: cnode is being committed and must be copied before writing ++ * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), ++ * so it can (and must) be freed when the commit is finished ++ */ ++enum { ++ DIRTY_CNODE = 0, ++ COW_CNODE = 1, ++ OBSOLETE_CNODE = 2, ++}; ++ ++/* ++ * Dirty flag bits (lpt_drty_flgs) for LPT special nodes. ++ * ++ * LTAB_DIRTY: ltab node is dirty ++ * LSAVE_DIRTY: lsave node is dirty ++ */ ++enum { ++ LTAB_DIRTY = 1, ++ LSAVE_DIRTY = 2, ++}; ++ ++/* ++ * Return codes used by the garbage collector. ++ * @LEB_FREED: the logical eraseblock was freed and is ready to use ++ * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit ++ * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes ++ */ ++enum { ++ LEB_FREED, ++ LEB_FREED_IDX, ++ LEB_RETAINED, ++}; ++ ++/** ++ * struct ubifs_old_idx - index node obsoleted since last commit start. ++ * @rb: rb-tree node ++ * @lnum: LEB number of obsoleted index node ++ * @offs: offset of obsoleted index node ++ */ ++struct ubifs_old_idx { ++ struct rb_node rb; ++ int lnum; ++ int offs; ++}; ++ ++/* The below union makes it easier to deal with keys */ ++union ubifs_key { ++ uint8_t u8[CUR_MAX_KEY_LEN]; ++ uint32_t u32[CUR_MAX_KEY_LEN/4]; ++ uint64_t u64[CUR_MAX_KEY_LEN/8]; ++ __le32 j32[CUR_MAX_KEY_LEN/4]; ++}; ++ ++/** ++ * struct ubifs_scan_node - UBIFS scanned node information. ++ * @list: list of scanned nodes ++ * @key: key of node scanned (if it has one) ++ * @sqnum: sequence number ++ * @type: type of node scanned ++ * @offs: offset with LEB of node scanned ++ * @len: length of node scanned ++ * @node: raw node ++ */ ++struct ubifs_scan_node { ++ struct list_head list; ++ union ubifs_key key; ++ unsigned long long sqnum; ++ int type; ++ int offs; ++ int len; ++ void *node; ++}; ++ ++/** ++ * struct ubifs_scan_leb - UBIFS scanned LEB information. ++ * @lnum: logical eraseblock number ++ * @nodes_cnt: number of nodes scanned ++ * @nodes: list of struct ubifs_scan_node ++ * @endpt: end point (and therefore the start of empty space) ++ * @ecc: read returned -EBADMSG ++ * @buf: buffer containing entire LEB scanned ++ */ ++struct ubifs_scan_leb { ++ int lnum; ++ int nodes_cnt; ++ struct list_head nodes; ++ int endpt; ++ int ecc; ++ void *buf; ++}; ++ ++/** ++ * struct ubifs_gced_idx_leb - garbage-collected indexing LEB. ++ * @list: list ++ * @lnum: LEB number ++ * @unmap: OK to unmap this LEB ++ * ++ * This data structure is used to temporary store garbage-collected indexing ++ * LEBs - they are not released immediately, but only after the next commit. ++ * This is needed to guarantee recoverability. ++ */ ++struct ubifs_gced_idx_leb { ++ struct list_head list; ++ int lnum; ++ int unmap; ++}; ++ ++/** ++ * struct ubifs_inode - UBIFS in-memory inode description. ++ * @vfs_inode: VFS inode description object ++ * @creat_sqnum: sequence number at time of creation ++ * @del_cmtno: commit number corresponding to the time the inode was deleted, ++ * protected by @c->commit_sem; ++ * @xattr_size: summarized size of all extended attributes in bytes ++ * @xattr_cnt: count of extended attributes this inode has ++ * @xattr_names: sum of lengths of all extended attribute names belonging to ++ * this inode ++ * @dirty: non-zero if the inode is dirty ++ * @xattr: non-zero if this is an extended attribute inode ++ * @bulk_read: non-zero if bulk-read should be used ++ * @ui_mutex: serializes inode write-back with the rest of VFS operations, ++ * serializes "clean <-> dirty" state changes, serializes bulk-read, ++ * protects @dirty, @bulk_read, @ui_size, and @xattr_size ++ * @ui_lock: protects @synced_i_size ++ * @synced_i_size: synchronized size of inode, i.e. the value of inode size ++ * currently stored on the flash; used only for regular file ++ * inodes ++ * @ui_size: inode size used by UBIFS when writing to flash ++ * @flags: inode flags (@UBIFS_COMPR_FL, etc) ++ * @compr_type: default compression type used for this inode ++ * @last_page_read: page number of last page read (for bulk read) ++ * @read_in_a_row: number of consecutive pages read in a row (for bulk read) ++ * @data_len: length of the data attached to the inode ++ * @data: inode's data ++ * ++ * @ui_mutex exists for two main reasons. At first it prevents inodes from ++ * being written back while UBIFS changing them, being in the middle of an VFS ++ * operation. This way UBIFS makes sure the inode fields are consistent. For ++ * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and ++ * write-back must not write any of them before we have finished. ++ * ++ * The second reason is budgeting - UBIFS has to budget all operations. If an ++ * operation is going to mark an inode dirty, it has to allocate budget for ++ * this. It cannot just mark it dirty because there is no guarantee there will ++ * be enough flash space to write the inode back later. This means UBIFS has ++ * to have full control over inode "clean <-> dirty" transitions (and pages ++ * actually). But unfortunately, VFS marks inodes dirty in many places, and it ++ * does not ask the file-system if it is allowed to do so (there is a notifier, ++ * but it is not enough), i.e., there is no mechanism to synchronize with this. ++ * So UBIFS has its own inode dirty flag and its own mutex to serialize ++ * "clean <-> dirty" transitions. ++ * ++ * The @synced_i_size field is used to make sure we never write pages which are ++ * beyond last synchronized inode size. See 'ubifs_writepage()' for more ++ * information. ++ * ++ * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses ++ * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot ++ * make sure @inode->i_size is always changed under @ui_mutex, because it ++ * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock ++ * with 'ubifs_writepage()' (see file.c). All the other inode fields are ++ * changed under @ui_mutex, so they do not need "shadow" fields. Note, one ++ * could consider to rework locking and base it on "shadow" fields. ++ */ ++struct ubifs_inode { ++ struct inode vfs_inode; ++ unsigned long long creat_sqnum; ++ unsigned long long del_cmtno; ++ unsigned int xattr_size; ++ unsigned int xattr_cnt; ++ unsigned int xattr_names; ++ unsigned int dirty:1; ++ unsigned int xattr:1; ++ unsigned int bulk_read:1; ++ unsigned int compr_type:2; ++ struct mutex ui_mutex; ++ spinlock_t ui_lock; ++ loff_t synced_i_size; ++ loff_t ui_size; ++ int flags; ++ pgoff_t last_page_read; ++ pgoff_t read_in_a_row; ++ int data_len; ++ void *data; ++}; ++ ++/** ++ * struct ubifs_unclean_leb - records a LEB recovered under read-only mode. ++ * @list: list ++ * @lnum: LEB number of recovered LEB ++ * @endpt: offset where recovery ended ++ * ++ * This structure records a LEB identified during recovery that needs to be ++ * cleaned but was not because UBIFS was mounted read-only. The information ++ * is used to clean the LEB when remounting to read-write mode. ++ */ ++struct ubifs_unclean_leb { ++ struct list_head list; ++ int lnum; ++ int endpt; ++}; ++ ++/* ++ * LEB properties flags. ++ * ++ * LPROPS_UNCAT: not categorized ++ * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index ++ * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index ++ * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index ++ * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs ++ * LPROPS_EMPTY: LEB is empty, not taken ++ * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken ++ * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken ++ * LPROPS_CAT_MASK: mask for the LEB categories above ++ * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media) ++ * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash) ++ */ ++enum { ++ LPROPS_UNCAT = 0, ++ LPROPS_DIRTY = 1, ++ LPROPS_DIRTY_IDX = 2, ++ LPROPS_FREE = 3, ++ LPROPS_HEAP_CNT = 3, ++ LPROPS_EMPTY = 4, ++ LPROPS_FREEABLE = 5, ++ LPROPS_FRDI_IDX = 6, ++ LPROPS_CAT_MASK = 15, ++ LPROPS_TAKEN = 16, ++ LPROPS_INDEX = 32, ++}; ++ ++/** ++ * struct ubifs_lprops - logical eraseblock properties. ++ * @free: amount of free space in bytes ++ * @dirty: amount of dirty space in bytes ++ * @flags: LEB properties flags (see above) ++ * @lnum: LEB number ++ * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE) ++ * @hpos: heap position in heap of same-category lprops (other categories) ++ */ ++struct ubifs_lprops { ++ int free; ++ int dirty; ++ int flags; ++ int lnum; ++ union { ++ struct list_head list; ++ int hpos; ++ }; ++}; ++ ++/** ++ * struct ubifs_lpt_lprops - LPT logical eraseblock properties. ++ * @free: amount of free space in bytes ++ * @dirty: amount of dirty space in bytes ++ * @tgc: trivial GC flag (1 => unmap after commit end) ++ * @cmt: commit flag (1 => reserved for commit) ++ */ ++struct ubifs_lpt_lprops { ++ int free; ++ int dirty; ++ unsigned tgc:1; ++ unsigned cmt:1; ++}; ++ ++/** ++ * struct ubifs_lp_stats - statistics of eraseblocks in the main area. ++ * @empty_lebs: number of empty LEBs ++ * @taken_empty_lebs: number of taken LEBs ++ * @idx_lebs: number of indexing LEBs ++ * @total_free: total free space in bytes (includes all LEBs) ++ * @total_dirty: total dirty space in bytes (includes all LEBs) ++ * @total_used: total used space in bytes (does not include index LEBs) ++ * @total_dead: total dead space in bytes (does not include index LEBs) ++ * @total_dark: total dark space in bytes (does not include index LEBs) ++ * ++ * The @taken_empty_lebs field counts the LEBs that are in the transient state ++ * of having been "taken" for use but not yet written to. @taken_empty_lebs is ++ * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be ++ * used by itself (in which case 'unused_lebs' would be a better name). In the ++ * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained ++ * by GC, but unlike other empty LEBs that are "taken", it may not be written ++ * straight away (i.e. before the next commit start or unmount), so either ++ * @gc_lnum must be specially accounted for, or the current approach followed ++ * i.e. count it under @taken_empty_lebs. ++ * ++ * @empty_lebs includes @taken_empty_lebs. ++ * ++ * @total_used, @total_dead and @total_dark fields do not account indexing ++ * LEBs. ++ */ ++struct ubifs_lp_stats { ++ int empty_lebs; ++ int taken_empty_lebs; ++ int idx_lebs; ++ long long total_free; ++ long long total_dirty; ++ long long total_used; ++ long long total_dead; ++ long long total_dark; ++}; ++ ++struct ubifs_nnode; ++ ++/** ++ * struct ubifs_cnode - LEB Properties Tree common node. ++ * @parent: parent nnode ++ * @cnext: next cnode to commit ++ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) ++ * @iip: index in parent ++ * @level: level in the tree (zero for pnodes, greater than zero for nnodes) ++ * @num: node number ++ */ ++struct ubifs_cnode { ++ struct ubifs_nnode *parent; ++ struct ubifs_cnode *cnext; ++ unsigned long flags; ++ int iip; ++ int level; ++ int num; ++}; ++ ++/** ++ * struct ubifs_pnode - LEB Properties Tree leaf node. ++ * @parent: parent nnode ++ * @cnext: next cnode to commit ++ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) ++ * @iip: index in parent ++ * @level: level in the tree (always zero for pnodes) ++ * @num: node number ++ * @lprops: LEB properties array ++ */ ++struct ubifs_pnode { ++ struct ubifs_nnode *parent; ++ struct ubifs_cnode *cnext; ++ unsigned long flags; ++ int iip; ++ int level; ++ int num; ++ struct ubifs_lprops lprops[UBIFS_LPT_FANOUT]; ++}; ++ ++/** ++ * struct ubifs_nbranch - LEB Properties Tree internal node branch. ++ * @lnum: LEB number of child ++ * @offs: offset of child ++ * @nnode: nnode child ++ * @pnode: pnode child ++ * @cnode: cnode child ++ */ ++struct ubifs_nbranch { ++ int lnum; ++ int offs; ++ union { ++ struct ubifs_nnode *nnode; ++ struct ubifs_pnode *pnode; ++ struct ubifs_cnode *cnode; ++ }; ++}; ++ ++/** ++ * struct ubifs_nnode - LEB Properties Tree internal node. ++ * @parent: parent nnode ++ * @cnext: next cnode to commit ++ * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) ++ * @iip: index in parent ++ * @level: level in the tree (always greater than zero for nnodes) ++ * @num: node number ++ * @nbranch: branches to child nodes ++ */ ++struct ubifs_nnode { ++ struct ubifs_nnode *parent; ++ struct ubifs_cnode *cnext; ++ unsigned long flags; ++ int iip; ++ int level; ++ int num; ++ struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT]; ++}; ++ ++/** ++ * struct ubifs_lpt_heap - heap of categorized lprops. ++ * @arr: heap array ++ * @cnt: number in heap ++ * @max_cnt: maximum number allowed in heap ++ * ++ * There are %LPROPS_HEAP_CNT heaps. ++ */ ++struct ubifs_lpt_heap { ++ struct ubifs_lprops **arr; ++ int cnt; ++ int max_cnt; ++}; ++ ++/* ++ * Return codes for LPT scan callback function. ++ * ++ * LPT_SCAN_CONTINUE: continue scanning ++ * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory ++ * LPT_SCAN_STOP: stop scanning ++ */ ++enum { ++ LPT_SCAN_CONTINUE = 0, ++ LPT_SCAN_ADD = 1, ++ LPT_SCAN_STOP = 2, ++}; ++ ++struct ubifs_info; ++ ++/* Callback used by the 'ubifs_lpt_scan_nolock()' function */ ++typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, ++ const struct ubifs_lprops *lprops, ++ int in_tree, void *data); ++ ++/** ++ * struct ubifs_wbuf - UBIFS write-buffer. ++ * @c: UBIFS file-system description object ++ * @buf: write-buffer (of min. flash I/O unit size) ++ * @lnum: logical eraseblock number the write-buffer points to ++ * @offs: write-buffer offset in this logical eraseblock ++ * @avail: number of bytes available in the write-buffer ++ * @used: number of used bytes in the write-buffer ++ * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, ++ * %UBI_UNKNOWN) ++ * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep ++ * up by 'mutex_lock_nested()). ++ * @sync_callback: write-buffer synchronization callback ++ * @io_mutex: serializes write-buffer I/O ++ * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes ++ * fields ++ * @timer: write-buffer timer ++ * @timeout: timer expire interval in jiffies ++ * @need_sync: it is set if its timer expired and needs sync ++ * @next_ino: points to the next position of the following inode number ++ * @inodes: stores the inode numbers of the nodes which are in wbuf ++ * ++ * The write-buffer synchronization callback is called when the write-buffer is ++ * synchronized in order to notify how much space was wasted due to ++ * write-buffer padding and how much free space is left in the LEB. ++ * ++ * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under ++ * spin-lock or mutex because they are written under both mutex and spin-lock. ++ * @buf is appended to under mutex but overwritten under both mutex and ++ * spin-lock. Thus the data between @buf and @buf + @used can be read under ++ * spinlock. ++ */ ++struct ubifs_wbuf { ++ struct ubifs_info *c; ++ void *buf; ++ int lnum; ++ int offs; ++ int avail; ++ int used; ++ int dtype; ++ int jhead; ++ int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); ++ struct mutex io_mutex; ++ spinlock_t lock; ++ struct timer_list timer; ++ int timeout; ++ int need_sync; ++ int next_ino; ++ ino_t *inodes; ++}; ++ ++/** ++ * struct ubifs_bud - bud logical eraseblock. ++ * @lnum: logical eraseblock number ++ * @start: where the (uncommitted) bud data starts ++ * @jhead: journal head number this bud belongs to ++ * @list: link in the list buds belonging to the same journal head ++ * @rb: link in the tree of all buds ++ */ ++struct ubifs_bud { ++ int lnum; ++ int start; ++ int jhead; ++ struct list_head list; ++ struct rb_node rb; ++}; ++ ++/** ++ * struct ubifs_jhead - journal head. ++ * @wbuf: head's write-buffer ++ * @buds_list: list of bud LEBs belonging to this journal head ++ * ++ * Note, the @buds list is protected by the @c->buds_lock. ++ */ ++struct ubifs_jhead { ++ struct ubifs_wbuf wbuf; ++ struct list_head buds_list; ++}; ++ ++/** ++ * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. ++ * @key: key ++ * @znode: znode address in memory ++ * @lnum: LEB number of the target node (indexing node or data node) ++ * @offs: target node offset within @lnum ++ * @len: target node length ++ */ ++struct ubifs_zbranch { ++ union ubifs_key key; ++ union { ++ struct ubifs_znode *znode; ++ void *leaf; ++ }; ++ int lnum; ++ int offs; ++ int len; ++}; ++ ++/** ++ * struct ubifs_znode - in-memory representation of an indexing node. ++ * @parent: parent znode or NULL if it is the root ++ * @cnext: next znode to commit ++ * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE) ++ * @time: last access time (seconds) ++ * @level: level of the entry in the TNC tree ++ * @child_cnt: count of child znodes ++ * @iip: index in parent's zbranch array ++ * @alt: lower bound of key range has altered i.e. child inserted at slot 0 ++ * @lnum: LEB number of the corresponding indexing node ++ * @offs: offset of the corresponding indexing node ++ * @len: length of the corresponding indexing node ++ * @zbranch: array of znode branches (@c->fanout elements) ++ */ ++struct ubifs_znode { ++ struct ubifs_znode *parent; ++ struct ubifs_znode *cnext; ++ unsigned long flags; ++ unsigned long time; ++ int level; ++ int child_cnt; ++ int iip; ++ int alt; ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ int lnum, offs, len; ++#endif ++ struct ubifs_zbranch zbranch[]; ++}; ++ ++/** ++ * struct bu_info - bulk-read information. ++ * @key: first data node key ++ * @zbranch: zbranches of data nodes to bulk read ++ * @buf: buffer to read into ++ * @buf_len: buffer length ++ * @gc_seq: GC sequence number to detect races with GC ++ * @cnt: number of data nodes for bulk read ++ * @blk_cnt: number of data blocks including holes ++ * @oef: end of file reached ++ */ ++struct bu_info { ++ union ubifs_key key; ++ struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ]; ++ void *buf; ++ int buf_len; ++ int gc_seq; ++ int cnt; ++ int blk_cnt; ++ int eof; ++}; ++ ++/** ++ * struct ubifs_node_range - node length range description data structure. ++ * @len: fixed node length ++ * @min_len: minimum possible node length ++ * @max_len: maximum possible node length ++ * ++ * If @max_len is %0, the node has fixed length @len. ++ */ ++struct ubifs_node_range { ++ union { ++ int len; ++ int min_len; ++ }; ++ int max_len; ++}; ++ ++/** ++ * struct ubifs_compressor - UBIFS compressor description structure. ++ * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc) ++ * @cc: cryptoapi compressor handle ++ * @comp_mutex: mutex used during compression ++ * @decomp_mutex: mutex used during decompression ++ * @name: compressor name ++ * @capi_name: cryptoapi compressor name ++ */ ++struct ubifs_compressor { ++ int compr_type; ++ struct crypto_comp *cc; ++ struct mutex *comp_mutex; ++ struct mutex *decomp_mutex; ++ const char *name; ++ const char *capi_name; ++}; ++ ++/** ++ * struct ubifs_budget_req - budget requirements of an operation. ++ * ++ * @fast: non-zero if the budgeting should try to acquire budget quickly and ++ * should not try to call write-back ++ * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields ++ * have to be re-calculated ++ * @new_page: non-zero if the operation adds a new page ++ * @dirtied_page: non-zero if the operation makes a page dirty ++ * @new_dent: non-zero if the operation adds a new directory entry ++ * @mod_dent: non-zero if the operation removes or modifies an existing ++ * directory entry ++ * @new_ino: non-zero if the operation adds a new inode ++ * @new_ino_d: now much data newly created inode contains ++ * @dirtied_ino: how many inodes the operation makes dirty ++ * @dirtied_ino_d: now much data dirtied inode contains ++ * @idx_growth: how much the index will supposedly grow ++ * @data_growth: how much new data the operation will supposedly add ++ * @dd_growth: how much data that makes other data dirty the operation will ++ * supposedly add ++ * ++ * @idx_growth, @data_growth and @dd_growth are not used in budget request. The ++ * budgeting subsystem caches index and data growth values there to avoid ++ * re-calculating them when the budget is released. However, if @idx_growth is ++ * %-1, it is calculated by the release function using other fields. ++ * ++ * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d ++ * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made ++ * dirty by the re-name operation. ++ * ++ * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to ++ * make sure the amount of inode data which contribute to @new_ino_d and ++ * @dirtied_ino_d fields are aligned. ++ */ ++struct ubifs_budget_req { ++ unsigned int fast:1; ++ unsigned int recalculate:1; ++#ifndef UBIFS_DEBUG ++ unsigned int new_page:1; ++ unsigned int dirtied_page:1; ++ unsigned int new_dent:1; ++ unsigned int mod_dent:1; ++ unsigned int new_ino:1; ++ unsigned int new_ino_d:13; ++ unsigned int dirtied_ino:4; ++ unsigned int dirtied_ino_d:15; ++#else ++ /* Not bit-fields to check for overflows */ ++ unsigned int new_page; ++ unsigned int dirtied_page; ++ unsigned int new_dent; ++ unsigned int mod_dent; ++ unsigned int new_ino; ++ unsigned int new_ino_d; ++ unsigned int dirtied_ino; ++ unsigned int dirtied_ino_d; ++#endif ++ int idx_growth; ++ int data_growth; ++ int dd_growth; ++}; ++ ++/** ++ * struct ubifs_orphan - stores the inode number of an orphan. ++ * @rb: rb-tree node of rb-tree of orphans sorted by inode number ++ * @list: list head of list of orphans in order added ++ * @new_list: list head of list of orphans added since the last commit ++ * @cnext: next orphan to commit ++ * @dnext: next orphan to delete ++ * @inum: inode number ++ * @new: %1 => added since the last commit, otherwise %0 ++ */ ++struct ubifs_orphan { ++ struct rb_node rb; ++ struct list_head list; ++ struct list_head new_list; ++ struct ubifs_orphan *cnext; ++ struct ubifs_orphan *dnext; ++ ino_t inum; ++ int new; ++}; ++ ++/** ++ * struct ubifs_mount_opts - UBIFS-specific mount options information. ++ * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) ++ * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) ++ * @chk_data_crc: enable/disable CRC data checking when reading data nodes ++ * (%0 default, %1 disabe, %2 enable) ++ * @override_compr: override default compressor (%0 - do not override and use ++ * superblock compressor, %1 - override and use compressor ++ * specified in @compr_type) ++ * @compr_type: compressor type to override the superblock compressor with ++ * (%UBIFS_COMPR_NONE, etc) ++ */ ++struct ubifs_mount_opts { ++ unsigned int unmount_mode:2; ++ unsigned int bulk_read:2; ++ unsigned int chk_data_crc:2; ++ unsigned int override_compr:1; ++ unsigned int compr_type:2; ++}; ++ ++struct ubifs_debug_info; ++ ++/** ++ * struct ubifs_info - UBIFS file-system description data structure ++ * (per-superblock). ++ * @vfs_sb: VFS @struct super_block object ++ * @bdi: backing device info object to make VFS happy and disable read-ahead ++ * ++ * @highest_inum: highest used inode number ++ * @max_sqnum: current global sequence number ++ * @cmt_no: commit number of the last successfully completed commit, protected ++ * by @commit_sem ++ * @cnt_lock: protects @highest_inum and @max_sqnum counters ++ * @fmt_version: UBIFS on-flash format version ++ * @ro_compat_version: R/O compatibility version ++ * @uuid: UUID from super block ++ * ++ * @lhead_lnum: log head logical eraseblock number ++ * @lhead_offs: log head offset ++ * @ltail_lnum: log tail logical eraseblock number (offset is always 0) ++ * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and ++ * @bud_bytes ++ * @min_log_bytes: minimum required number of bytes in the log ++ * @cmt_bud_bytes: used during commit to temporarily amount of bytes in ++ * committed buds ++ * ++ * @buds: tree of all buds indexed by bud LEB number ++ * @bud_bytes: how many bytes of flash is used by buds ++ * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud ++ * lists ++ * @jhead_cnt: count of journal heads ++ * @jheads: journal heads (head zero is base head) ++ * @max_bud_bytes: maximum number of bytes allowed in buds ++ * @bg_bud_bytes: number of bud bytes when background commit is initiated ++ * @old_buds: buds to be released after commit ends ++ * @max_bud_cnt: maximum number of buds ++ * ++ * @commit_sem: synchronizes committer with other processes ++ * @cmt_state: commit state ++ * @cs_lock: commit state lock ++ * @cmt_wq: wait queue to sleep on if the log is full and a commit is running ++ * ++ * @big_lpt: flag that LPT is too big to write whole during commit ++ * @no_chk_data_crc: do not check CRCs when reading data nodes (except during ++ * recovery) ++ * @bulk_read: enable bulk-reads ++ * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) ++ * @rw_incompat: the media is not R/W compatible ++ * ++ * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and ++ * @calc_idx_sz ++ * @zroot: zbranch which points to the root index node and znode ++ * @cnext: next znode to commit ++ * @enext: next znode to commit to empty space ++ * @gap_lebs: array of LEBs used by the in-gaps commit method ++ * @cbuf: commit buffer ++ * @ileb_buf: buffer for commit in-the-gaps method ++ * @ileb_len: length of data in ileb_buf ++ * @ihead_lnum: LEB number of index head ++ * @ihead_offs: offset of index head ++ * @ilebs: pre-allocated index LEBs ++ * @ileb_cnt: number of pre-allocated index LEBs ++ * @ileb_nxt: next pre-allocated index LEBs ++ * @old_idx: tree of index nodes obsoleted since the last commit start ++ * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c ++ * ++ * @mst_node: master node ++ * @mst_offs: offset of valid master node ++ * @mst_mutex: protects the master node area, @mst_node, and @mst_offs ++ * ++ * @max_bu_buf_len: maximum bulk-read buffer length ++ * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu ++ * @bu: pre-allocated bulk-read information ++ * ++ * @log_lebs: number of logical eraseblocks in the log ++ * @log_bytes: log size in bytes ++ * @log_last: last LEB of the log ++ * @lpt_lebs: number of LEBs used for lprops table ++ * @lpt_first: first LEB of the lprops table area ++ * @lpt_last: last LEB of the lprops table area ++ * @orph_lebs: number of LEBs used for the orphan area ++ * @orph_first: first LEB of the orphan area ++ * @orph_last: last LEB of the orphan area ++ * @main_lebs: count of LEBs in the main area ++ * @main_first: first LEB of the main area ++ * @main_bytes: main area size in bytes ++ * ++ * @key_hash_type: type of the key hash ++ * @key_hash: direntry key hash function ++ * @key_fmt: key format ++ * @key_len: key length ++ * @fanout: fanout of the index tree (number of links per indexing node) ++ * ++ * @min_io_size: minimal input/output unit size ++ * @min_io_shift: number of bits in @min_io_size minus one ++ * @leb_size: logical eraseblock size in bytes ++ * @half_leb_size: half LEB size ++ * @idx_leb_size: how many bytes of an LEB are effectively available when it is ++ * used to store indexing nodes (@leb_size - @max_idx_node_sz) ++ * @leb_cnt: count of logical eraseblocks ++ * @max_leb_cnt: maximum count of logical eraseblocks ++ * @old_leb_cnt: count of logical eraseblocks before re-size ++ * @ro_media: the underlying UBI volume is read-only ++ * ++ * @dirty_pg_cnt: number of dirty pages (not used) ++ * @dirty_zn_cnt: number of dirty znodes ++ * @clean_zn_cnt: number of clean znodes ++ * ++ * @budg_idx_growth: amount of bytes budgeted for index growth ++ * @budg_data_growth: amount of bytes budgeted for cached data ++ * @budg_dd_growth: amount of bytes budgeted for cached data that will make ++ * other data dirty ++ * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, ++ * but which still have to be taken into account because ++ * the index has not been committed so far ++ * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, ++ * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, ++ * @nospace, and @nospace_rp; ++ * @min_idx_lebs: minimum number of LEBs required for the index ++ * @old_idx_sz: size of index on flash ++ * @calc_idx_sz: temporary variable which is used to calculate new index size ++ * (contains accurate new index size at end of TNC commit start) ++ * @lst: lprops statistics ++ * @nospace: non-zero if the file-system does not have flash space (used as ++ * optimization) ++ * @nospace_rp: the same as @nospace, but additionally means that even reserved ++ * pool is full ++ * ++ * @page_budget: budget for a page ++ * @inode_budget: budget for an inode ++ * @dent_budget: budget for a directory entry ++ * ++ * @ref_node_alsz: size of the LEB reference node aligned to the min. flash ++ * I/O unit ++ * @mst_node_alsz: master node aligned size ++ * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary ++ * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary ++ * @max_inode_sz: maximum possible inode size in bytes ++ * @max_znode_sz: size of znode in bytes ++ * ++ * @leb_overhead: how many bytes are wasted in an LEB when it is filled with ++ * data nodes of maximum size - used in free space reporting ++ * @dead_wm: LEB dead space watermark ++ * @dark_wm: LEB dark space watermark ++ * @block_cnt: count of 4KiB blocks on the FS ++ * ++ * @ranges: UBIFS node length ranges ++ * @ubi: UBI volume descriptor ++ * @di: UBI device information ++ * @vi: UBI volume information ++ * ++ * @orph_tree: rb-tree of orphan inode numbers ++ * @orph_list: list of orphan inode numbers in order added ++ * @orph_new: list of orphan inode numbers added since last commit ++ * @orph_cnext: next orphan to commit ++ * @orph_dnext: next orphan to delete ++ * @orphan_lock: lock for orph_tree and orph_new ++ * @orph_buf: buffer for orphan nodes ++ * @new_orphans: number of orphans since last commit ++ * @cmt_orphans: number of orphans being committed ++ * @tot_orphans: number of orphans in the rb_tree ++ * @max_orphans: maximum number of orphans allowed ++ * @ohead_lnum: orphan head LEB number ++ * @ohead_offs: orphan head offset ++ * @no_orphs: non-zero if there are no orphans ++ * ++ * @bgt: UBIFS background thread ++ * @bgt_name: background thread name ++ * @need_bgt: if background thread should run ++ * @need_wbuf_sync: if write-buffers have to be synchronized ++ * ++ * @gc_lnum: LEB number used for garbage collection ++ * @sbuf: a buffer of LEB size used by GC and replay for scanning ++ * @idx_gc: list of index LEBs that have been garbage collected ++ * @idx_gc_cnt: number of elements on the idx_gc list ++ * @gc_seq: incremented for every non-index LEB garbage collected ++ * @gced_lnum: last non-index LEB that was garbage collected ++ * ++ * @infos_list: links all 'ubifs_info' objects ++ * @umount_mutex: serializes shrinker and un-mount ++ * @shrinker_run_no: shrinker run number ++ * ++ * @space_bits: number of bits needed to record free or dirty space ++ * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT ++ * @lpt_offs_bits: number of bits needed to record an offset in the LPT ++ * @lpt_spc_bits: number of bits needed to space in the LPT ++ * @pcnt_bits: number of bits needed to record pnode or nnode number ++ * @lnum_bits: number of bits needed to record LEB number ++ * @nnode_sz: size of on-flash nnode ++ * @pnode_sz: size of on-flash pnode ++ * @ltab_sz: size of on-flash LPT lprops table ++ * @lsave_sz: size of on-flash LPT save table ++ * @pnode_cnt: number of pnodes ++ * @nnode_cnt: number of nnodes ++ * @lpt_hght: height of the LPT ++ * @pnodes_have: number of pnodes in memory ++ * ++ * @lp_mutex: protects lprops table and all the other lprops-related fields ++ * @lpt_lnum: LEB number of the root nnode of the LPT ++ * @lpt_offs: offset of the root nnode of the LPT ++ * @nhead_lnum: LEB number of LPT head ++ * @nhead_offs: offset of LPT head ++ * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab ++ * @dirty_nn_cnt: number of dirty nnodes ++ * @dirty_pn_cnt: number of dirty pnodes ++ * @check_lpt_free: flag that indicates LPT GC may be needed ++ * @lpt_sz: LPT size ++ * @lpt_nod_buf: buffer for an on-flash nnode or pnode ++ * @lpt_buf: buffer of LEB size used by LPT ++ * @nroot: address in memory of the root nnode of the LPT ++ * @lpt_cnext: next LPT node to commit ++ * @lpt_heap: array of heaps of categorized lprops ++ * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at ++ * previous commit start ++ * @uncat_list: list of un-categorized LEBs ++ * @empty_list: list of empty LEBs ++ * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) ++ * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) ++ * @freeable_cnt: number of freeable LEBs in @freeable_list ++ * ++ * @ltab_lnum: LEB number of LPT's own lprops table ++ * @ltab_offs: offset of LPT's own lprops table ++ * @ltab: LPT's own lprops table ++ * @ltab_cmt: LPT's own lprops table (commit copy) ++ * @lsave_cnt: number of LEB numbers in LPT's save table ++ * @lsave_lnum: LEB number of LPT's save table ++ * @lsave_offs: offset of LPT's save table ++ * @lsave: LPT's save table ++ * @lscan_lnum: LEB number of last LPT scan ++ * ++ * @rp_size: size of the reserved pool in bytes ++ * @report_rp_size: size of the reserved pool reported to user-space ++ * @rp_uid: reserved pool user ID ++ * @rp_gid: reserved pool group ID ++ * ++ * @empty: if the UBI device is empty ++ * @replay_tree: temporary tree used during journal replay ++ * @replay_list: temporary list used during journal replay ++ * @replay_buds: list of buds to replay ++ * @cs_sqnum: sequence number of first node in the log (commit start node) ++ * @replay_sqnum: sequence number of node currently being replayed ++ * @need_recovery: file-system needs recovery ++ * @replaying: set to %1 during journal replay ++ * @unclean_leb_list: LEBs to recover when mounting ro to rw ++ * @rcvrd_mst_node: recovered master node to write when mounting ro to rw ++ * @size_tree: inode size information for recovery ++ * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) ++ * @always_chk_crc: always check CRCs (while mounting and remounting rw) ++ * @mount_opts: UBIFS-specific mount options ++ * ++ * @dbg: debugging-related information ++ */ ++struct ubifs_info { ++ struct super_block *vfs_sb; ++ struct backing_dev_info bdi; ++ ++ ino_t highest_inum; ++ unsigned long long max_sqnum; ++ unsigned long long cmt_no; ++ spinlock_t cnt_lock; ++ int fmt_version; ++ int ro_compat_version; ++ unsigned char uuid[16]; ++ ++ int lhead_lnum; ++ int lhead_offs; ++ int ltail_lnum; ++ struct mutex log_mutex; ++ int min_log_bytes; ++ long long cmt_bud_bytes; ++ ++ struct rb_root buds; ++ long long bud_bytes; ++ spinlock_t buds_lock; ++ int jhead_cnt; ++ struct ubifs_jhead *jheads; ++ long long max_bud_bytes; ++ long long bg_bud_bytes; ++ struct list_head old_buds; ++ int max_bud_cnt; ++ ++ struct rw_semaphore commit_sem; ++ int cmt_state; ++ spinlock_t cs_lock; ++ wait_queue_head_t cmt_wq; ++ ++ unsigned int big_lpt:1; ++ unsigned int no_chk_data_crc:1; ++ unsigned int bulk_read:1; ++ unsigned int default_compr:2; ++ unsigned int rw_incompat:1; ++ ++ struct mutex tnc_mutex; ++ struct ubifs_zbranch zroot; ++ struct ubifs_znode *cnext; ++ struct ubifs_znode *enext; ++ int *gap_lebs; ++ void *cbuf; ++ void *ileb_buf; ++ int ileb_len; ++ int ihead_lnum; ++ int ihead_offs; ++ int *ilebs; ++ int ileb_cnt; ++ int ileb_nxt; ++ struct rb_root old_idx; ++ int *bottom_up_buf; ++ ++ struct ubifs_mst_node *mst_node; ++ int mst_offs; ++ struct mutex mst_mutex; ++ ++ int max_bu_buf_len; ++ struct mutex bu_mutex; ++ struct bu_info bu; ++ ++ int log_lebs; ++ long long log_bytes; ++ int log_last; ++ int lpt_lebs; ++ int lpt_first; ++ int lpt_last; ++ int orph_lebs; ++ int orph_first; ++ int orph_last; ++ int main_lebs; ++ int main_first; ++ long long main_bytes; ++ ++ uint8_t key_hash_type; ++ uint32_t (*key_hash)(const char *str, int len); ++ int key_fmt; ++ int key_len; ++ int fanout; ++ ++ int min_io_size; ++ int min_io_shift; ++ int leb_size; ++ int half_leb_size; ++ int idx_leb_size; ++ int leb_cnt; ++ int max_leb_cnt; ++ int old_leb_cnt; ++ int ro_media; ++ ++ atomic_long_t dirty_pg_cnt; ++ atomic_long_t dirty_zn_cnt; ++ atomic_long_t clean_zn_cnt; ++ ++ long long budg_idx_growth; ++ long long budg_data_growth; ++ long long budg_dd_growth; ++ long long budg_uncommitted_idx; ++ spinlock_t space_lock; ++ int min_idx_lebs; ++ unsigned long long old_idx_sz; ++ unsigned long long calc_idx_sz; ++ struct ubifs_lp_stats lst; ++ unsigned int nospace:1; ++ unsigned int nospace_rp:1; ++ ++ int page_budget; ++ int inode_budget; ++ int dent_budget; ++ ++ int ref_node_alsz; ++ int mst_node_alsz; ++ int min_idx_node_sz; ++ int max_idx_node_sz; ++ long long max_inode_sz; ++ int max_znode_sz; ++ ++ int leb_overhead; ++ int dead_wm; ++ int dark_wm; ++ int block_cnt; ++ ++ struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT]; ++ struct ubi_volume_desc *ubi; ++ struct ubi_device_info di; ++ struct ubi_volume_info vi; ++ ++ struct rb_root orph_tree; ++ struct list_head orph_list; ++ struct list_head orph_new; ++ struct ubifs_orphan *orph_cnext; ++ struct ubifs_orphan *orph_dnext; ++ spinlock_t orphan_lock; ++ void *orph_buf; ++ int new_orphans; ++ int cmt_orphans; ++ int tot_orphans; ++ int max_orphans; ++ int ohead_lnum; ++ int ohead_offs; ++ int no_orphs; ++ ++ struct task_struct *bgt; ++ char bgt_name[sizeof(BGT_NAME_PATTERN) + 9]; ++ int need_bgt; ++ int need_wbuf_sync; ++ ++ int gc_lnum; ++ void *sbuf; ++ struct list_head idx_gc; ++ int idx_gc_cnt; ++ int gc_seq; ++ int gced_lnum; ++ ++ struct list_head infos_list; ++ struct mutex umount_mutex; ++ unsigned int shrinker_run_no; ++ ++ int space_bits; ++ int lpt_lnum_bits; ++ int lpt_offs_bits; ++ int lpt_spc_bits; ++ int pcnt_bits; ++ int lnum_bits; ++ int nnode_sz; ++ int pnode_sz; ++ int ltab_sz; ++ int lsave_sz; ++ int pnode_cnt; ++ int nnode_cnt; ++ int lpt_hght; ++ int pnodes_have; ++ ++ struct mutex lp_mutex; ++ int lpt_lnum; ++ int lpt_offs; ++ int nhead_lnum; ++ int nhead_offs; ++ int lpt_drty_flgs; ++ int dirty_nn_cnt; ++ int dirty_pn_cnt; ++ int check_lpt_free; ++ long long lpt_sz; ++ void *lpt_nod_buf; ++ void *lpt_buf; ++ struct ubifs_nnode *nroot; ++ struct ubifs_cnode *lpt_cnext; ++ struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT]; ++ struct ubifs_lpt_heap dirty_idx; ++ struct list_head uncat_list; ++ struct list_head empty_list; ++ struct list_head freeable_list; ++ struct list_head frdi_idx_list; ++ int freeable_cnt; ++ ++ int ltab_lnum; ++ int ltab_offs; ++ struct ubifs_lpt_lprops *ltab; ++ struct ubifs_lpt_lprops *ltab_cmt; ++ int lsave_cnt; ++ int lsave_lnum; ++ int lsave_offs; ++ int *lsave; ++ int lscan_lnum; ++ ++ long long rp_size; ++ long long report_rp_size; ++ uid_t rp_uid; ++ gid_t rp_gid; ++ ++ /* The below fields are used only during mounting and re-mounting */ ++ int empty; ++ struct rb_root replay_tree; ++ struct list_head replay_list; ++ struct list_head replay_buds; ++ unsigned long long cs_sqnum; ++ unsigned long long replay_sqnum; ++ int need_recovery; ++ int replaying; ++ struct list_head unclean_leb_list; ++ struct ubifs_mst_node *rcvrd_mst_node; ++ struct rb_root size_tree; ++ int remounting_rw; ++ int always_chk_crc; ++ struct ubifs_mount_opts mount_opts; ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ struct ubifs_debug_info *dbg; ++#endif ++}; ++ ++extern struct list_head ubifs_infos; ++extern spinlock_t ubifs_infos_lock; ++extern atomic_long_t ubifs_clean_zn_cnt; ++extern struct kmem_cache *ubifs_inode_slab; ++extern const struct super_operations ubifs_super_operations; ++extern const struct address_space_operations ubifs_file_address_operations; ++extern const struct file_operations ubifs_file_operations; ++extern const struct inode_operations ubifs_file_inode_operations; ++extern const struct file_operations ubifs_dir_operations; ++extern const struct inode_operations ubifs_dir_inode_operations; ++extern const struct inode_operations ubifs_symlink_inode_operations; ++extern struct backing_dev_info ubifs_backing_dev_info; ++extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; ++ ++/* io.c */ ++void ubifs_ro_mode(struct ubifs_info *c, int err); ++int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); ++int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, ++ int dtype); ++int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); ++int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ++ int lnum, int offs); ++int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, ++ int lnum, int offs); ++int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, ++ int offs, int dtype); ++int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, ++ int offs, int quiet, int must_chk_crc); ++void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); ++void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); ++int ubifs_io_init(struct ubifs_info *c); ++void ubifs_pad(const struct ubifs_info *c, void *buf, int pad); ++int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf); ++int ubifs_bg_wbufs_sync(struct ubifs_info *c); ++void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum); ++int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); ++ ++/* scan.c */ ++struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, ++ int offs, void *sbuf); ++void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); ++int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, ++ int offs, int quiet); ++struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, ++ int offs, void *sbuf); ++void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ int lnum, int offs); ++int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ void *buf, int offs); ++void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, ++ void *buf); ++ ++/* log.c */ ++void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud); ++void ubifs_create_buds_lists(struct ubifs_info *c); ++int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs); ++struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum); ++struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum); ++int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum); ++int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum); ++int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum); ++int ubifs_consolidate_log(struct ubifs_info *c); ++ ++/* journal.c */ ++int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, ++ const struct qstr *nm, const struct inode *inode, ++ int deletion, int xent); ++int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, ++ const union ubifs_key *key, const void *buf, int len); ++int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); ++int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); ++int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, ++ const struct dentry *old_dentry, ++ const struct inode *new_dir, ++ const struct dentry *new_dentry, int sync); ++int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, ++ loff_t old_size, loff_t new_size); ++int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, ++ const struct inode *inode, const struct qstr *nm); ++int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1, ++ const struct inode *inode2); ++ ++/* budget.c */ ++int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req); ++void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req); ++void ubifs_release_dirty_inode_budget(struct ubifs_info *c, ++ struct ubifs_inode *ui); ++int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode, ++ struct ubifs_budget_req *req); ++void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, ++ struct ubifs_budget_req *req); ++void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, ++ struct ubifs_budget_req *req); ++long long ubifs_get_free_space(struct ubifs_info *c); ++long long ubifs_get_free_space_nolock(struct ubifs_info *c); ++int ubifs_calc_min_idx_lebs(struct ubifs_info *c); ++void ubifs_convert_page_budget(struct ubifs_info *c); ++long long ubifs_reported_space(const struct ubifs_info *c, long long free); ++long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); ++ ++/* find.c */ ++int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, ++ int squeeze); ++int ubifs_find_free_leb_for_idx(struct ubifs_info *c); ++int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, ++ int min_space, int pick_free); ++int ubifs_find_dirty_idx_leb(struct ubifs_info *c); ++int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); ++ ++/* tnc.c */ ++int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, ++ struct ubifs_znode **zn, int *n); ++int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, ++ void *node, const struct qstr *nm); ++int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, ++ void *node, int *lnum, int *offs); ++int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, ++ int offs, int len); ++int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, ++ int old_lnum, int old_offs, int lnum, int offs, int len); ++int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, ++ int lnum, int offs, int len, const struct qstr *nm); ++int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key); ++int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, ++ const struct qstr *nm); ++int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, ++ union ubifs_key *to_key); ++int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum); ++struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, ++ union ubifs_key *key, ++ const struct qstr *nm); ++void ubifs_tnc_close(struct ubifs_info *c); ++int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, ++ int lnum, int offs, int is_idx); ++int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, ++ int lnum, int offs); ++/* Shared by tnc.c for tnc_commit.c */ ++void destroy_old_idx(struct ubifs_info *c); ++int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, ++ int lnum, int offs); ++int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); ++int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu); ++int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu); ++ ++/* tnc_misc.c */ ++struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, ++ struct ubifs_znode *znode); ++int ubifs_search_zbranch(const struct ubifs_info *c, ++ const struct ubifs_znode *znode, ++ const union ubifs_key *key, int *n); ++struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode); ++struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode); ++long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr); ++struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, ++ struct ubifs_zbranch *zbr, ++ struct ubifs_znode *parent, int iip); ++int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, ++ void *node); ++ ++/* tnc_commit.c */ ++int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); ++int ubifs_tnc_end_commit(struct ubifs_info *c); ++ ++/* shrinker.c */ ++int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); ++ ++/* commit.c */ ++int ubifs_bg_thread(void *info); ++void ubifs_commit_required(struct ubifs_info *c); ++void ubifs_request_bg_commit(struct ubifs_info *c); ++int ubifs_run_commit(struct ubifs_info *c); ++void ubifs_recovery_commit(struct ubifs_info *c); ++int ubifs_gc_should_commit(struct ubifs_info *c); ++void ubifs_wait_for_commit(struct ubifs_info *c); ++ ++/* master.c */ ++int ubifs_read_master(struct ubifs_info *c); ++int ubifs_write_master(struct ubifs_info *c); ++ ++/* sb.c */ ++int ubifs_read_superblock(struct ubifs_info *c); ++struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); ++int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); ++ ++/* replay.c */ ++int ubifs_validate_entry(struct ubifs_info *c, ++ const struct ubifs_dent_node *dent); ++int ubifs_replay_journal(struct ubifs_info *c); ++ ++/* gc.c */ ++int ubifs_garbage_collect(struct ubifs_info *c, int anyway); ++int ubifs_gc_start_commit(struct ubifs_info *c); ++int ubifs_gc_end_commit(struct ubifs_info *c); ++void ubifs_destroy_idx_gc(struct ubifs_info *c); ++int ubifs_get_idx_gc_leb(struct ubifs_info *c); ++int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); ++ ++/* orphan.c */ ++int ubifs_add_orphan(struct ubifs_info *c, ino_t inum); ++void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); ++int ubifs_orphan_start_commit(struct ubifs_info *c); ++int ubifs_orphan_end_commit(struct ubifs_info *c); ++int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); ++int ubifs_clear_orphans(struct ubifs_info *c); ++ ++/* lpt.c */ ++int ubifs_calc_lpt_geom(struct ubifs_info *c); ++int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, ++ int *lpt_lebs, int *big_lpt); ++int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr); ++struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum); ++struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum); ++int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, ++ ubifs_lpt_scan_callback scan_cb, void *data); ++ ++/* Shared by lpt.c for lpt_commit.c */ ++void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave); ++void ubifs_pack_ltab(struct ubifs_info *c, void *buf, ++ struct ubifs_lpt_lprops *ltab); ++void ubifs_pack_pnode(struct ubifs_info *c, void *buf, ++ struct ubifs_pnode *pnode); ++void ubifs_pack_nnode(struct ubifs_info *c, void *buf, ++ struct ubifs_nnode *nnode); ++struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, ++ struct ubifs_nnode *parent, int iip); ++struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, ++ struct ubifs_nnode *parent, int iip); ++int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip); ++void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); ++void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); ++uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); ++struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); ++/* Needed only in debugging code in lpt_commit.c */ ++int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, ++ struct ubifs_nnode *nnode); ++ ++/* lpt_commit.c */ ++int ubifs_lpt_start_commit(struct ubifs_info *c); ++int ubifs_lpt_end_commit(struct ubifs_info *c); ++int ubifs_lpt_post_commit(struct ubifs_info *c); ++void ubifs_lpt_free(struct ubifs_info *c, int wr_only); ++ ++/* lprops.c */ ++const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, ++ const struct ubifs_lprops *lp, ++ int free, int dirty, int flags, ++ int idx_gc_cnt); ++void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst); ++void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, ++ int cat); ++void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, ++ struct ubifs_lprops *new_lprops); ++void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops); ++int ubifs_categorize_lprops(const struct ubifs_info *c, ++ const struct ubifs_lprops *lprops); ++int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, ++ int flags_set, int flags_clean, int idx_gc_cnt); ++int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, ++ int flags_set, int flags_clean); ++int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp); ++const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); ++const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); ++const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); ++const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); ++ ++/* file.c */ ++int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); ++int ubifs_setattr(struct dentry *dentry, struct iattr *attr); ++ ++/* dir.c */ ++struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, ++ int mode); ++int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat); ++ ++/* xattr.c */ ++int ubifs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags); ++ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, ++ size_t size); ++ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); ++int ubifs_removexattr(struct dentry *dentry, const char *name); ++ ++/* super.c */ ++struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); ++ ++/* recovery.c */ ++int ubifs_recover_master_node(struct ubifs_info *c); ++int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); ++struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, ++ int offs, void *sbuf, int grouped); ++struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, ++ int offs, void *sbuf); ++int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); ++int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); ++int ubifs_rcvry_gc_commit(struct ubifs_info *c); ++int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, ++ int deletion, loff_t new_size); ++int ubifs_recover_size(struct ubifs_info *c); ++void ubifs_destroy_size_tree(struct ubifs_info *c); ++ ++/* ioctl.c */ ++long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); ++void ubifs_set_inode_flags(struct inode *inode); ++#ifdef CONFIG_COMPAT ++long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); ++#endif ++ ++/* compressor.c */ ++int __init ubifs_compressors_init(void); ++void ubifs_compressors_exit(void); ++void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, ++ int *compr_type); ++int ubifs_decompress(const void *buf, int len, void *out, int *out_len, ++ int compr_type); ++ ++#include "debug.h" ++#include "misc.h" ++#include "key.h" ++ ++#endif /* !__UBIFS_H__ */ +diff -Nurd linux-2.6.24/fs/ubifs/xattr.c ubifs-v2.6.24/fs/ubifs/xattr.c +--- linux-2.6.24/fs/ubifs/xattr.c 1970-01-01 02:00:00.000000000 +0200 ++++ ubifs-v2.6.24/fs/ubifs/xattr.c 2009-04-07 17:14:47.000000000 +0200 +@@ -0,0 +1,571 @@ ++/* ++ * This file is part of UBIFS. ++ * ++ * Copyright (C) 2006-2008 Nokia Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published by ++ * the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 51 ++ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ * ++ * Authors: Artem Bityutskiy (Битюцкий Артём) ++ * Adrian Hunter ++ */ ++ ++/* ++ * This file implements UBIFS extended attributes support. ++ * ++ * Extended attributes are implemented as regular inodes with attached data, ++ * which limits extended attribute size to UBIFS block size (4KiB). Names of ++ * extended attributes are described by extended attribute entries (xentries), ++ * which are almost identical to directory entries, but have different key type. ++ * ++ * In other words, the situation with extended attributes is very similar to ++ * directories. Indeed, any inode (but of course not xattr inodes) may have a ++ * number of associated xentries, just like directory inodes have associated ++ * directory entries. Extended attribute entries store the name of the extended ++ * attribute, the host inode number, and the extended attribute inode number. ++ * Similarly, direntries store the name, the parent and the target inode ++ * numbers. Thus, most of the common UBIFS mechanisms may be re-used for ++ * extended attributes. ++ * ++ * The number of extended attributes is not limited, but there is Linux ++ * limitation on the maximum possible size of the list of all extended ++ * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure ++ * the sum of all extended attribute names of the inode does not exceed that ++ * limit. ++ * ++ * Extended attributes are synchronous, which means they are written to the ++ * flash media synchronously and there is no write-back for extended attribute ++ * inodes. The extended attribute values are not stored in compressed form on ++ * the media. ++ * ++ * Since extended attributes are represented by regular inodes, they are cached ++ * in the VFS inode cache. The xentries are cached in the LNC cache (see ++ * tnc.c). ++ * ++ * ACL support is not implemented. ++ */ ++ ++#include <linux/xattr.h> ++#include <linux/posix_acl_xattr.h> ++#include "ubifs.h" ++ ++/* ++ * Limit the number of extended attributes per inode so that the total size ++ * (@xattr_size) is guaranteeded to fit in an 'unsigned int'. ++ */ ++#define MAX_XATTRS_PER_INODE 65535 ++ ++/* ++ * Extended attribute type constants. ++ * ++ * USER_XATTR: user extended attribute ("user.*") ++ * TRUSTED_XATTR: trusted extended attribute ("trusted.*) ++ * SECURITY_XATTR: security extended attribute ("security.*") ++ */ ++enum { ++ USER_XATTR, ++ TRUSTED_XATTR, ++ SECURITY_XATTR, ++}; ++ ++static struct inode_operations none_inode_operations; ++static struct address_space_operations none_address_operations; ++static struct file_operations none_file_operations; ++ ++/** ++ * create_xattr - create an extended attribute. ++ * @c: UBIFS file-system description object ++ * @host: host inode ++ * @nm: extended attribute name ++ * @value: extended attribute value ++ * @size: size of extended attribute value ++ * ++ * This is a helper function which creates an extended attribute of name @nm ++ * and value @value for inode @host. The host inode is also updated on flash ++ * because the ctime and extended attribute accounting data changes. This ++ * function returns zero in case of success and a negative error code in case ++ * of failure. ++ */ ++static int create_xattr(struct ubifs_info *c, struct inode *host, ++ const struct qstr *nm, const void *value, int size) ++{ ++ int err; ++ struct inode *inode; ++ struct ubifs_inode *ui, *host_ui = ubifs_inode(host); ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1, ++ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; ++ ++ if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) ++ return -ENOSPC; ++ /* ++ * Linux limits the maximum size of the extended attribute names list ++ * to %XATTR_LIST_MAX. This means we should not allow creating more ++ * extended attributes if the name list becomes larger. This limitation ++ * is artificial for UBIFS, though. ++ */ ++ if (host_ui->xattr_names + host_ui->xattr_cnt + ++ nm->len + 1 > XATTR_LIST_MAX) ++ return -ENOSPC; ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_budg; ++ } ++ ++ /* Re-define all operations to be "nothing" */ ++ inode->i_mapping->a_ops = &none_address_operations; ++ inode->i_op = &none_inode_operations; ++ inode->i_fop = &none_file_operations; ++ ++ inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; ++ ui = ubifs_inode(inode); ++ ui->xattr = 1; ++ ui->flags |= UBIFS_XATTR_FL; ++ ui->data = kmalloc(size, GFP_NOFS); ++ if (!ui->data) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ memcpy(ui->data, value, size); ++ inode->i_size = ui->ui_size = size; ++ ui->data_len = size; ++ ++ mutex_lock(&host_ui->ui_mutex); ++ host->i_ctime = ubifs_current_time(host); ++ host_ui->xattr_cnt += 1; ++ host_ui->xattr_size += CALC_DENT_SIZE(nm->len); ++ host_ui->xattr_size += CALC_XATTR_BYTES(size); ++ host_ui->xattr_names += nm->len; ++ ++ err = ubifs_jnl_update(c, host, nm, inode, 0, 1); ++ if (err) ++ goto out_cancel; ++ mutex_unlock(&host_ui->ui_mutex); ++ ++ ubifs_release_budget(c, &req); ++ insert_inode_hash(inode); ++ iput(inode); ++ return 0; ++ ++out_cancel: ++ host_ui->xattr_cnt -= 1; ++ host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); ++ host_ui->xattr_size -= CALC_XATTR_BYTES(size); ++ mutex_unlock(&host_ui->ui_mutex); ++out_free: ++ make_bad_inode(inode); ++ iput(inode); ++out_budg: ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++/** ++ * change_xattr - change an extended attribute. ++ * @c: UBIFS file-system description object ++ * @host: host inode ++ * @inode: extended attribute inode ++ * @value: extended attribute value ++ * @size: size of extended attribute value ++ * ++ * This helper function changes the value of extended attribute @inode with new ++ * data from @value. Returns zero in case of success and a negative error code ++ * in case of failure. ++ */ ++static int change_xattr(struct ubifs_info *c, struct inode *host, ++ struct inode *inode, const void *value, int size) ++{ ++ int err; ++ struct ubifs_inode *host_ui = ubifs_inode(host); ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ struct ubifs_budget_req req = { .dirtied_ino = 2, ++ .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; ++ ++ ubifs_assert(ui->data_len == inode->i_size); ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ kfree(ui->data); ++ ui->data = kmalloc(size, GFP_NOFS); ++ if (!ui->data) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ memcpy(ui->data, value, size); ++ inode->i_size = ui->ui_size = size; ++ ui->data_len = size; ++ ++ mutex_lock(&host_ui->ui_mutex); ++ host->i_ctime = ubifs_current_time(host); ++ host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); ++ host_ui->xattr_size += CALC_XATTR_BYTES(size); ++ ++ /* ++ * It is important to write the host inode after the xattr inode ++ * because if the host inode gets synchronized (via 'fsync()'), then ++ * the extended attribute inode gets synchronized, because it goes ++ * before the host inode in the write-buffer. ++ */ ++ err = ubifs_jnl_change_xattr(c, inode, host); ++ if (err) ++ goto out_cancel; ++ mutex_unlock(&host_ui->ui_mutex); ++ ++ ubifs_release_budget(c, &req); ++ return 0; ++ ++out_cancel: ++ host_ui->xattr_size -= CALC_XATTR_BYTES(size); ++ host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); ++ mutex_unlock(&host_ui->ui_mutex); ++ make_bad_inode(inode); ++out_free: ++ ubifs_release_budget(c, &req); ++ return err; ++} ++ ++/** ++ * check_namespace - check extended attribute name-space. ++ * @nm: extended attribute name ++ * ++ * This function makes sure the extended attribute name belongs to one of the ++ * supported extended attribute name-spaces. Returns name-space index in case ++ * of success and a negative error code in case of failure. ++ */ ++static int check_namespace(const struct qstr *nm) ++{ ++ int type; ++ ++ if (nm->len > UBIFS_MAX_NLEN) ++ return -ENAMETOOLONG; ++ ++ if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, ++ XATTR_TRUSTED_PREFIX_LEN)) { ++ if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') ++ return -EINVAL; ++ type = TRUSTED_XATTR; ++ } else if (!strncmp(nm->name, XATTR_USER_PREFIX, ++ XATTR_USER_PREFIX_LEN)) { ++ if (nm->name[XATTR_USER_PREFIX_LEN] == '\0') ++ return -EINVAL; ++ type = USER_XATTR; ++ } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, ++ XATTR_SECURITY_PREFIX_LEN)) { ++ if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') ++ return -EINVAL; ++ type = SECURITY_XATTR; ++ } else ++ return -EOPNOTSUPP; ++ ++ return type; ++} ++ ++static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum) ++{ ++ struct inode *inode; ++ ++ inode = ubifs_iget(c->vfs_sb, inum); ++ if (IS_ERR(inode)) { ++ ubifs_err("dead extended attribute entry, error %d", ++ (int)PTR_ERR(inode)); ++ return inode; ++ } ++ if (ubifs_inode(inode)->xattr) ++ return inode; ++ ubifs_err("corrupt extended attribute entry"); ++ iput(inode); ++ return ERR_PTR(-EINVAL); ++} ++ ++int ubifs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ struct inode *inode, *host = dentry->d_inode; ++ struct ubifs_info *c = host->i_sb->s_fs_info; ++ struct qstr nm = { .name = name, .len = strlen(name) }; ++ struct ubifs_dent_node *xent; ++ union ubifs_key key; ++ int err, type; ++ ++ dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, ++ host->i_ino, dentry->d_name.len, dentry->d_name.name, size); ++ ubifs_assert(mutex_is_locked(&host->i_mutex)); ++ ++ if (size > UBIFS_MAX_INO_DATA) ++ return -ERANGE; ++ ++ type = check_namespace(&nm); ++ if (type < 0) ++ return type; ++ ++ xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); ++ if (!xent) ++ return -ENOMEM; ++ ++ /* ++ * The extended attribute entries are stored in LNC, so multiple ++ * look-ups do not involve reading the flash. ++ */ ++ xent_key_init(c, &key, host->i_ino, &nm); ++ err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); ++ if (err) { ++ if (err != -ENOENT) ++ goto out_free; ++ ++ if (flags & XATTR_REPLACE) ++ /* We are asked not to create the xattr */ ++ err = -ENODATA; ++ else ++ err = create_xattr(c, host, &nm, value, size); ++ goto out_free; ++ } ++ ++ if (flags & XATTR_CREATE) { ++ /* We are asked not to replace the xattr */ ++ err = -EEXIST; ++ goto out_free; ++ } ++ ++ inode = iget_xattr(c, le64_to_cpu(xent->inum)); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_free; ++ } ++ ++ err = change_xattr(c, host, inode, value, size); ++ iput(inode); ++ ++out_free: ++ kfree(xent); ++ return err; ++} ++ ++ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, ++ size_t size) ++{ ++ struct inode *inode, *host = dentry->d_inode; ++ struct ubifs_info *c = host->i_sb->s_fs_info; ++ struct qstr nm = { .name = name, .len = strlen(name) }; ++ struct ubifs_inode *ui; ++ struct ubifs_dent_node *xent; ++ union ubifs_key key; ++ int err; ++ ++ dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, ++ host->i_ino, dentry->d_name.len, dentry->d_name.name, size); ++ ++ err = check_namespace(&nm); ++ if (err < 0) ++ return err; ++ ++ xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); ++ if (!xent) ++ return -ENOMEM; ++ ++ xent_key_init(c, &key, host->i_ino, &nm); ++ err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); ++ if (err) { ++ if (err == -ENOENT) ++ err = -ENODATA; ++ goto out_unlock; ++ } ++ ++ inode = iget_xattr(c, le64_to_cpu(xent->inum)); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_unlock; ++ } ++ ++ ui = ubifs_inode(inode); ++ ubifs_assert(inode->i_size == ui->data_len); ++ ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len); ++ ++ if (buf) { ++ /* If @buf is %NULL we are supposed to return the length */ ++ if (ui->data_len > size) { ++ dbg_err("buffer size %zd, xattr len %d", ++ size, ui->data_len); ++ err = -ERANGE; ++ goto out_iput; ++ } ++ ++ memcpy(buf, ui->data, ui->data_len); ++ } ++ err = ui->data_len; ++ ++out_iput: ++ iput(inode); ++out_unlock: ++ kfree(xent); ++ return err; ++} ++ ++ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) ++{ ++ union ubifs_key key; ++ struct inode *host = dentry->d_inode; ++ struct ubifs_info *c = host->i_sb->s_fs_info; ++ struct ubifs_inode *host_ui = ubifs_inode(host); ++ struct ubifs_dent_node *xent, *pxent = NULL; ++ int err, len, written = 0; ++ struct qstr nm = { .name = NULL }; ++ ++ dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, ++ dentry->d_name.len, dentry->d_name.name, size); ++ ++ len = host_ui->xattr_names + host_ui->xattr_cnt; ++ if (!buffer) ++ /* ++ * We should return the minimum buffer size which will fit a ++ * null-terminated list of all the extended attribute names. ++ */ ++ return len; ++ ++ if (len > size) ++ return -ERANGE; ++ ++ lowest_xent_key(c, &key, host->i_ino); ++ while (1) { ++ int type; ++ ++ xent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(xent)) { ++ err = PTR_ERR(xent); ++ break; ++ } ++ ++ nm.name = xent->name; ++ nm.len = le16_to_cpu(xent->nlen); ++ ++ type = check_namespace(&nm); ++ if (unlikely(type < 0)) { ++ err = type; ++ break; ++ } ++ ++ /* Show trusted namespace only for "power" users */ ++ if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) { ++ memcpy(buffer + written, nm.name, nm.len + 1); ++ written += nm.len + 1; ++ } ++ ++ kfree(pxent); ++ pxent = xent; ++ key_read(c, &xent->key, &key); ++ } ++ ++ kfree(pxent); ++ if (err != -ENOENT) { ++ ubifs_err("cannot find next direntry, error %d", err); ++ return err; ++ } ++ ++ ubifs_assert(written <= size); ++ return written; ++} ++ ++static int remove_xattr(struct ubifs_info *c, struct inode *host, ++ struct inode *inode, const struct qstr *nm) ++{ ++ int err; ++ struct ubifs_inode *host_ui = ubifs_inode(host); ++ struct ubifs_inode *ui = ubifs_inode(inode); ++ struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1, ++ .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; ++ ++ ubifs_assert(ui->data_len == inode->i_size); ++ ++ err = ubifs_budget_space(c, &req); ++ if (err) ++ return err; ++ ++ mutex_lock(&host_ui->ui_mutex); ++ host->i_ctime = ubifs_current_time(host); ++ host_ui->xattr_cnt -= 1; ++ host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); ++ host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); ++ host_ui->xattr_names -= nm->len; ++ ++ err = ubifs_jnl_delete_xattr(c, host, inode, nm); ++ if (err) ++ goto out_cancel; ++ mutex_unlock(&host_ui->ui_mutex); ++ ++ ubifs_release_budget(c, &req); ++ return 0; ++ ++out_cancel: ++ host_ui->xattr_cnt += 1; ++ host_ui->xattr_size += CALC_DENT_SIZE(nm->len); ++ host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); ++ mutex_unlock(&host_ui->ui_mutex); ++ ubifs_release_budget(c, &req); ++ make_bad_inode(inode); ++ return err; ++} ++ ++int ubifs_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct inode *inode, *host = dentry->d_inode; ++ struct ubifs_info *c = host->i_sb->s_fs_info; ++ struct qstr nm = { .name = name, .len = strlen(name) }; ++ struct ubifs_dent_node *xent; ++ union ubifs_key key; ++ int err; ++ ++ dbg_gen("xattr '%s', ino %lu ('%.*s')", name, ++ host->i_ino, dentry->d_name.len, dentry->d_name.name); ++ ubifs_assert(mutex_is_locked(&host->i_mutex)); ++ ++ err = check_namespace(&nm); ++ if (err < 0) ++ return err; ++ ++ xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS); ++ if (!xent) ++ return -ENOMEM; ++ ++ xent_key_init(c, &key, host->i_ino, &nm); ++ err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); ++ if (err) { ++ if (err == -ENOENT) ++ err = -ENODATA; ++ goto out_free; ++ } ++ ++ inode = iget_xattr(c, le64_to_cpu(xent->inum)); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_free; ++ } ++ ++ ubifs_assert(inode->i_nlink == 1); ++ inode->i_nlink = 0; ++ err = remove_xattr(c, host, inode, &nm); ++ if (err) ++ inode->i_nlink = 1; ++ ++ /* If @i_nlink is 0, 'iput()' will delete the inode */ ++ iput(inode); ++ ++out_free: ++ kfree(xent); ++ return err; ++} +diff -Nurd linux-2.6.24/include/linux/fs.h ubifs-v2.6.24/include/linux/fs.h +--- linux-2.6.24/include/linux/fs.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/include/linux/fs.h 2009-04-07 17:14:47.000000000 +0200 +@@ -1670,6 +1670,8 @@ + extern int invalidate_inode_pages2(struct address_space *mapping); + extern int invalidate_inode_pages2_range(struct address_space *mapping, + pgoff_t start, pgoff_t end); ++extern void generic_sync_sb_inodes(struct super_block *sb, ++ struct writeback_control *wbc); + extern int write_inode_now(struct inode *, int); + extern int filemap_fdatawrite(struct address_space *); + extern int filemap_flush(struct address_space *); +diff -Nurd linux-2.6.24/include/linux/mtd/ubi.h ubifs-v2.6.24/include/linux/mtd/ubi.h +--- linux-2.6.24/include/linux/mtd/ubi.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/include/linux/mtd/ubi.h 2009-04-07 17:14:47.000000000 +0200 +@@ -26,23 +26,6 @@ + #include <mtd/ubi-user.h> + + /* +- * UBI data type hint constants. +- * +- * UBI_LONGTERM: long-term data +- * UBI_SHORTTERM: short-term data +- * UBI_UNKNOWN: data persistence is unknown +- * +- * These constants are used when data is written to UBI volumes in order to +- * help the UBI wear-leveling unit to find more appropriate physical +- * eraseblocks. +- */ +-enum { +- UBI_LONGTERM = 1, +- UBI_SHORTTERM, +- UBI_UNKNOWN +-}; +- +-/* + * enum ubi_open_mode - UBI volume open mode constants. + * + * UBI_READONLY: read-only mode +@@ -62,13 +45,13 @@ + * @size: how many physical eraseblocks are reserved for this volume + * @used_bytes: how many bytes of data this volume contains + * @used_ebs: how many physical eraseblocks of this volume actually contain any +- * data ++ * data + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) + * @corrupted: non-zero if the volume is corrupted (static volumes only) + * @upd_marker: non-zero if the volume has update marker set + * @alignment: volume alignment + * @usable_leb_size: how many bytes are available in logical eraseblocks of +- * this volume ++ * this volume + * @name_len: volume name length + * @name: volume name + * @cdev: UBI volume character device major and minor numbers +@@ -167,7 +150,9 @@ + int len, int dtype); + int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum); + int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum); ++int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); + int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum); ++int ubi_sync(int ubi_num); + + /* + * This function is the same as the 'ubi_leb_read()' function, but it does not +diff -Nurd linux-2.6.24/include/mtd/Kbuild ubifs-v2.6.24/include/mtd/Kbuild +--- linux-2.6.24/include/mtd/Kbuild 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/include/mtd/Kbuild 2009-04-07 17:14:47.000000000 +0200 +@@ -3,5 +3,4 @@ + header-y += mtd-abi.h + header-y += mtd-user.h + header-y += nftl-user.h +-header-y += ubi-header.h + header-y += ubi-user.h +diff -Nurd linux-2.6.24/include/mtd/ubi-header.h ubifs-v2.6.24/include/mtd/ubi-header.h +--- linux-2.6.24/include/mtd/ubi-header.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/include/mtd/ubi-header.h 1970-01-01 02:00:00.000000000 +0200 +@@ -1,331 +0,0 @@ +-/* +- * Copyright (c) International Business Machines Corp., 2006 +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See +- * the GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +- * +- * Authors: Artem Bityutskiy (Битюцкий Артём) +- * Thomas Gleixner +- * Frank Haverkamp +- * Oliver Lohmann +- * Andreas Arnez +- */ +- +-/* +- * This file defines the layout of UBI headers and all the other UBI on-flash +- * data structures. May be included by user-space. +- */ +- +-#ifndef __UBI_HEADER_H__ +-#define __UBI_HEADER_H__ +- +-#include <asm/byteorder.h> +- +-/* The version of UBI images supported by this implementation */ +-#define UBI_VERSION 1 +- +-/* The highest erase counter value supported by this implementation */ +-#define UBI_MAX_ERASECOUNTER 0x7FFFFFFF +- +-/* The initial CRC32 value used when calculating CRC checksums */ +-#define UBI_CRC32_INIT 0xFFFFFFFFU +- +-/* Erase counter header magic number (ASCII "UBI#") */ +-#define UBI_EC_HDR_MAGIC 0x55424923 +-/* Volume identifier header magic number (ASCII "UBI!") */ +-#define UBI_VID_HDR_MAGIC 0x55424921 +- +-/* +- * Volume type constants used in the volume identifier header. +- * +- * @UBI_VID_DYNAMIC: dynamic volume +- * @UBI_VID_STATIC: static volume +- */ +-enum { +- UBI_VID_DYNAMIC = 1, +- UBI_VID_STATIC = 2 +-}; +- +-/* +- * Compatibility constants used by internal volumes. +- * +- * @UBI_COMPAT_DELETE: delete this internal volume before anything is written +- * to the flash +- * @UBI_COMPAT_RO: attach this device in read-only mode +- * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its +- * physical eraseblocks, don't allow the wear-leveling unit to move them +- * @UBI_COMPAT_REJECT: reject this UBI image +- */ +-enum { +- UBI_COMPAT_DELETE = 1, +- UBI_COMPAT_RO = 2, +- UBI_COMPAT_PRESERVE = 4, +- UBI_COMPAT_REJECT = 5 +-}; +- +-/* Sizes of UBI headers */ +-#define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr) +-#define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr) +- +-/* Sizes of UBI headers without the ending CRC */ +-#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(__be32)) +-#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(__be32)) +- +-/** +- * struct ubi_ec_hdr - UBI erase counter header. +- * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC) +- * @version: version of UBI implementation which is supposed to accept this +- * UBI image +- * @padding1: reserved for future, zeroes +- * @ec: the erase counter +- * @vid_hdr_offset: where the VID header starts +- * @data_offset: where the user data start +- * @padding2: reserved for future, zeroes +- * @hdr_crc: erase counter header CRC checksum +- * +- * The erase counter header takes 64 bytes and has a plenty of unused space for +- * future usage. The unused fields are zeroed. The @version field is used to +- * indicate the version of UBI implementation which is supposed to be able to +- * work with this UBI image. If @version is greater then the current UBI +- * version, the image is rejected. This may be useful in future if something +- * is changed radically. This field is duplicated in the volume identifier +- * header. +- * +- * The @vid_hdr_offset and @data_offset fields contain the offset of the the +- * volume identifier header and user data, relative to the beginning of the +- * physical eraseblock. These values have to be the same for all physical +- * eraseblocks. +- */ +-struct ubi_ec_hdr { +- __be32 magic; +- __u8 version; +- __u8 padding1[3]; +- __be64 ec; /* Warning: the current limit is 31-bit anyway! */ +- __be32 vid_hdr_offset; +- __be32 data_offset; +- __u8 padding2[36]; +- __be32 hdr_crc; +-} __attribute__ ((packed)); +- +-/** +- * struct ubi_vid_hdr - on-flash UBI volume identifier header. +- * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC) +- * @version: UBI implementation version which is supposed to accept this UBI +- * image (%UBI_VERSION) +- * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC) +- * @copy_flag: if this logical eraseblock was copied from another physical +- * eraseblock (for wear-leveling reasons) +- * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE, +- * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT) +- * @vol_id: ID of this volume +- * @lnum: logical eraseblock number +- * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be +- * removed, kept only for not breaking older UBI users) +- * @data_size: how many bytes of data this logical eraseblock contains +- * @used_ebs: total number of used logical eraseblocks in this volume +- * @data_pad: how many bytes at the end of this physical eraseblock are not +- * used +- * @data_crc: CRC checksum of the data stored in this logical eraseblock +- * @padding1: reserved for future, zeroes +- * @sqnum: sequence number +- * @padding2: reserved for future, zeroes +- * @hdr_crc: volume identifier header CRC checksum +- * +- * The @sqnum is the value of the global sequence counter at the time when this +- * VID header was created. The global sequence counter is incremented each time +- * UBI writes a new VID header to the flash, i.e. when it maps a logical +- * eraseblock to a new physical eraseblock. The global sequence counter is an +- * unsigned 64-bit integer and we assume it never overflows. The @sqnum +- * (sequence number) is used to distinguish between older and newer versions of +- * logical eraseblocks. +- * +- * There are 2 situations when there may be more then one physical eraseblock +- * corresponding to the same logical eraseblock, i.e., having the same @vol_id +- * and @lnum values in the volume identifier header. Suppose we have a logical +- * eraseblock L and it is mapped to the physical eraseblock P. +- * +- * 1. Because UBI may erase physical eraseblocks asynchronously, the following +- * situation is possible: L is asynchronously erased, so P is scheduled for +- * erasure, then L is written to,i.e. mapped to another physical eraseblock P1, +- * so P1 is written to, then an unclean reboot happens. Result - there are 2 +- * physical eraseblocks P and P1 corresponding to the same logical eraseblock +- * L. But P1 has greater sequence number, so UBI picks P1 when it attaches the +- * flash. +- * +- * 2. From time to time UBI moves logical eraseblocks to other physical +- * eraseblocks for wear-leveling reasons. If, for example, UBI moves L from P +- * to P1, and an unclean reboot happens before P is physically erased, there +- * are two physical eraseblocks P and P1 corresponding to L and UBI has to +- * select one of them when the flash is attached. The @sqnum field says which +- * PEB is the original (obviously P will have lower @sqnum) and the copy. But +- * it is not enough to select the physical eraseblock with the higher sequence +- * number, because the unclean reboot could have happen in the middle of the +- * copying process, so the data in P is corrupted. It is also not enough to +- * just select the physical eraseblock with lower sequence number, because the +- * data there may be old (consider a case if more data was added to P1 after +- * the copying). Moreover, the unclean reboot may happen when the erasure of P +- * was just started, so it result in unstable P, which is "mostly" OK, but +- * still has unstable bits. +- * +- * UBI uses the @copy_flag field to indicate that this logical eraseblock is a +- * copy. UBI also calculates data CRC when the data is moved and stores it at +- * the @data_crc field of the copy (P1). So when UBI needs to pick one physical +- * eraseblock of two (P or P1), the @copy_flag of the newer one (P1) is +- * examined. If it is cleared, the situation* is simple and the newer one is +- * picked. If it is set, the data CRC of the copy (P1) is examined. If the CRC +- * checksum is correct, this physical eraseblock is selected (P1). Otherwise +- * the older one (P) is selected. +- * +- * Note, there is an obsolete @leb_ver field which was used instead of @sqnum +- * in the past. But it is not used anymore and we keep it in order to be able +- * to deal with old UBI images. It will be removed at some point. +- * +- * There are 2 sorts of volumes in UBI: user volumes and internal volumes. +- * Internal volumes are not seen from outside and are used for various internal +- * UBI purposes. In this implementation there is only one internal volume - the +- * layout volume. Internal volumes are the main mechanism of UBI extensions. +- * For example, in future one may introduce a journal internal volume. Internal +- * volumes have their own reserved range of IDs. +- * +- * The @compat field is only used for internal volumes and contains the "degree +- * of their compatibility". It is always zero for user volumes. This field +- * provides a mechanism to introduce UBI extensions and to be still compatible +- * with older UBI binaries. For example, if someone introduced a journal in +- * future, he would probably use %UBI_COMPAT_DELETE compatibility for the +- * journal volume. And in this case, older UBI binaries, which know nothing +- * about the journal volume, would just delete this volume and work perfectly +- * fine. This is similar to what Ext2fs does when it is fed by an Ext3fs image +- * - it just ignores the Ext3fs journal. +- * +- * The @data_crc field contains the CRC checksum of the contents of the logical +- * eraseblock if this is a static volume. In case of dynamic volumes, it does +- * not contain the CRC checksum as a rule. The only exception is when the +- * data of the physical eraseblock was moved by the wear-leveling unit, then +- * the wear-leveling unit calculates the data CRC and stores it in the +- * @data_crc field. And of course, the @copy_flag is %in this case. +- * +- * The @data_size field is used only for static volumes because UBI has to know +- * how many bytes of data are stored in this eraseblock. For dynamic volumes, +- * this field usually contains zero. The only exception is when the data of the +- * physical eraseblock was moved to another physical eraseblock for +- * wear-leveling reasons. In this case, UBI calculates CRC checksum of the +- * contents and uses both @data_crc and @data_size fields. In this case, the +- * @data_size field contains data size. +- * +- * The @used_ebs field is used only for static volumes and indicates how many +- * eraseblocks the data of the volume takes. For dynamic volumes this field is +- * not used and always contains zero. +- * +- * The @data_pad is calculated when volumes are created using the alignment +- * parameter. So, effectively, the @data_pad field reduces the size of logical +- * eraseblocks of this volume. This is very handy when one uses block-oriented +- * software (say, cramfs) on top of the UBI volume. +- */ +-struct ubi_vid_hdr { +- __be32 magic; +- __u8 version; +- __u8 vol_type; +- __u8 copy_flag; +- __u8 compat; +- __be32 vol_id; +- __be32 lnum; +- __be32 leb_ver; /* obsolete, to be removed, don't use */ +- __be32 data_size; +- __be32 used_ebs; +- __be32 data_pad; +- __be32 data_crc; +- __u8 padding1[4]; +- __be64 sqnum; +- __u8 padding2[12]; +- __be32 hdr_crc; +-} __attribute__ ((packed)); +- +-/* Internal UBI volumes count */ +-#define UBI_INT_VOL_COUNT 1 +- +-/* +- * Starting ID of internal volumes. There is reserved room for 4096 internal +- * volumes. +- */ +-#define UBI_INTERNAL_VOL_START (0x7FFFFFFF - 4096) +- +-/* The layout volume contains the volume table */ +- +-#define UBI_LAYOUT_VOL_ID UBI_INTERNAL_VOL_START +-#define UBI_LAYOUT_VOLUME_EBS 2 +-#define UBI_LAYOUT_VOLUME_NAME "layout volume" +-#define UBI_LAYOUT_VOLUME_COMPAT UBI_COMPAT_REJECT +- +-/* The maximum number of volumes per one UBI device */ +-#define UBI_MAX_VOLUMES 128 +- +-/* The maximum volume name length */ +-#define UBI_VOL_NAME_MAX 127 +- +-/* Size of the volume table record */ +-#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record) +- +-/* Size of the volume table record without the ending CRC */ +-#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(__be32)) +- +-/** +- * struct ubi_vtbl_record - a record in the volume table. +- * @reserved_pebs: how many physical eraseblocks are reserved for this volume +- * @alignment: volume alignment +- * @data_pad: how many bytes are unused at the end of the each physical +- * eraseblock to satisfy the requested alignment +- * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) +- * @upd_marker: if volume update was started but not finished +- * @name_len: volume name length +- * @name: the volume name +- * @padding2: reserved, zeroes +- * @crc: a CRC32 checksum of the record +- * +- * The volume table records are stored in the volume table, which is stored in +- * the layout volume. The layout volume consists of 2 logical eraseblock, each +- * of which contains a copy of the volume table (i.e., the volume table is +- * duplicated). The volume table is an array of &struct ubi_vtbl_record +- * objects indexed by the volume ID. +- * +- * If the size of the logical eraseblock is large enough to fit +- * %UBI_MAX_VOLUMES records, the volume table contains %UBI_MAX_VOLUMES +- * records. Otherwise, it contains as many records as it can fit (i.e., size of +- * logical eraseblock divided by sizeof(struct ubi_vtbl_record)). +- * +- * The @upd_marker flag is used to implement volume update. It is set to %1 +- * before update and set to %0 after the update. So if the update operation was +- * interrupted, UBI knows that the volume is corrupted. +- * +- * The @alignment field is specified when the volume is created and cannot be +- * later changed. It may be useful, for example, when a block-oriented file +- * system works on top of UBI. The @data_pad field is calculated using the +- * logical eraseblock size and @alignment. The alignment must be multiple to the +- * minimal flash I/O unit. If @alignment is 1, all the available space of +- * the physical eraseblocks is used. +- * +- * Empty records contain all zeroes and the CRC checksum of those zeroes. +- */ +-struct ubi_vtbl_record { +- __be32 reserved_pebs; +- __be32 alignment; +- __be32 data_pad; +- __u8 vol_type; +- __u8 upd_marker; +- __be16 name_len; +- __u8 name[UBI_VOL_NAME_MAX+1]; +- __u8 padding2[24]; +- __be32 crc; +-} __attribute__ ((packed)); +- +-#endif /* !__UBI_HEADER_H__ */ +diff -Nurd linux-2.6.24/include/mtd/ubi-user.h ubifs-v2.6.24/include/mtd/ubi-user.h +--- linux-2.6.24/include/mtd/ubi-user.h 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/include/mtd/ubi-user.h 2009-04-07 17:14:47.000000000 +0200 +@@ -22,33 +22,55 @@ + #define __UBI_USER_H__ + + /* ++ * UBI device creation (the same as MTD device attachment) ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * MTD devices may be attached using %UBI_IOCATT ioctl command of the UBI ++ * control device. The caller has to properly fill and pass ++ * &struct ubi_attach_req object - UBI will attach the MTD device specified in ++ * the request and return the newly created UBI device number as the ioctl ++ * return value. ++ * ++ * UBI device deletion (the same as MTD device detachment) ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * An UBI device maybe deleted with %UBI_IOCDET ioctl command of the UBI ++ * control device. ++ * + * UBI volume creation + * ~~~~~~~~~~~~~~~~~~~ + * +- * UBI volumes are created via the %UBI_IOCMKVOL IOCTL command of UBI character ++ * UBI volumes are created via the %UBI_IOCMKVOL ioctl command of UBI character + * device. A &struct ubi_mkvol_req object has to be properly filled and a +- * pointer to it has to be passed to the IOCTL. ++ * pointer to it has to be passed to the ioctl. + * + * UBI volume deletion + * ~~~~~~~~~~~~~~~~~~~ + * +- * To delete a volume, the %UBI_IOCRMVOL IOCTL command of the UBI character ++ * To delete a volume, the %UBI_IOCRMVOL ioctl command of the UBI character + * device should be used. A pointer to the 32-bit volume ID hast to be passed +- * to the IOCTL. ++ * to the ioctl. + * + * UBI volume re-size + * ~~~~~~~~~~~~~~~~~~ + * +- * To re-size a volume, the %UBI_IOCRSVOL IOCTL command of the UBI character ++ * To re-size a volume, the %UBI_IOCRSVOL ioctl command of the UBI character + * device should be used. A &struct ubi_rsvol_req object has to be properly +- * filled and a pointer to it has to be passed to the IOCTL. ++ * filled and a pointer to it has to be passed to the ioctl. ++ * ++ * UBI volumes re-name ++ * ~~~~~~~~~~~~~~~~~~~ ++ * ++ * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command ++ * of the UBI character device should be used. A &struct ubi_rnvol_req object ++ * has to be properly filled and a pointer to it has to be passed to the ioctl. + * + * UBI volume update + * ~~~~~~~~~~~~~~~~~ + * +- * Volume update should be done via the %UBI_IOCVOLUP IOCTL command of the ++ * Volume update should be done via the %UBI_IOCVOLUP ioctl command of the + * corresponding UBI volume character device. A pointer to a 64-bit update +- * size should be passed to the IOCTL. After then, UBI expects user to write ++ * size should be passed to the ioctl. After this, UBI expects user to write + * this number of bytes to the volume character device. The update is finished + * when the claimed number of bytes is passed. So, the volume update sequence + * is something like: +@@ -57,19 +79,73 @@ + * ioctl(fd, UBI_IOCVOLUP, &image_size); + * write(fd, buf, image_size); + * close(fd); ++ * ++ * Logical eraseblock erase ++ * ~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To erase a logical eraseblock, the %UBI_IOCEBER ioctl command of the ++ * corresponding UBI volume character device should be used. This command ++ * unmaps the requested logical eraseblock, makes sure the corresponding ++ * physical eraseblock is successfully erased, and returns. ++ * ++ * Atomic logical eraseblock change ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * Atomic logical eraseblock change operation is called using the %UBI_IOCEBCH ++ * ioctl command of the corresponding UBI volume character device. A pointer to ++ * a &struct ubi_leb_change_req object has to be passed to the ioctl. Then the ++ * user is expected to write the requested amount of bytes (similarly to what ++ * should be done in case of the "volume update" ioctl). ++ * ++ * Logical eraseblock map ++ * ~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To map a logical eraseblock to a physical eraseblock, the %UBI_IOCEBMAP ++ * ioctl command should be used. A pointer to a &struct ubi_map_req object is ++ * expected to be passed. The ioctl maps the requested logical eraseblock to ++ * a physical eraseblock and returns. Only non-mapped logical eraseblocks can ++ * be mapped. If the logical eraseblock specified in the request is already ++ * mapped to a physical eraseblock, the ioctl fails and returns error. ++ * ++ * Logical eraseblock unmap ++ * ~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To unmap a logical eraseblock to a physical eraseblock, the %UBI_IOCEBUNMAP ++ * ioctl command should be used. The ioctl unmaps the logical eraseblocks, ++ * schedules corresponding physical eraseblock for erasure, and returns. Unlike ++ * the "LEB erase" command, it does not wait for the physical eraseblock being ++ * erased. Note, the side effect of this is that if an unclean reboot happens ++ * after the unmap ioctl returns, you may find the LEB mapped again to the same ++ * physical eraseblock after the UBI is run again. ++ * ++ * Check if logical eraseblock is mapped ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To check if a logical eraseblock is mapped to a physical eraseblock, the ++ * %UBI_IOCEBISMAP ioctl command should be used. It returns %0 if the LEB is ++ * not mapped, and %1 if it is mapped. ++ * ++ * Set an UBI volume property ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To set an UBI volume property the %UBI_IOCSETPROP ioctl command should be ++ * used. A pointer to a &struct ubi_set_prop_req object is expected to be ++ * passed. The object describes which property should be set, and to which value ++ * it should be set. + */ + + /* +- * When a new volume is created, users may either specify the volume number they +- * want to create or to let UBI automatically assign a volume number using this +- * constant. ++ * When a new UBI volume or UBI device is created, users may either specify the ++ * volume/device number they want to create or to let UBI automatically assign ++ * the number using these constants. + */ + #define UBI_VOL_NUM_AUTO (-1) ++#define UBI_DEV_NUM_AUTO (-1) + + /* Maximum volume name length */ + #define UBI_MAX_VOLUME_NAME 127 + +-/* IOCTL commands of UBI character devices */ ++/* ioctl commands of UBI character devices */ + + #define UBI_IOC_MAGIC 'o' + +@@ -79,15 +155,59 @@ + #define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t) + /* Re-size an UBI volume */ + #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req) ++/* Re-name volumes */ ++#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req) + +-/* IOCTL commands of UBI volume character devices */ ++/* ioctl commands of the UBI control character device */ ++ ++#define UBI_CTRL_IOC_MAGIC 'o' ++ ++/* Attach an MTD device */ ++#define UBI_IOCATT _IOW(UBI_CTRL_IOC_MAGIC, 64, struct ubi_attach_req) ++/* Detach an MTD device */ ++#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t) ++ ++/* ioctl commands of UBI volume character devices */ + + #define UBI_VOL_IOC_MAGIC 'O' + + /* Start UBI volume update */ + #define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t) +-/* An eraseblock erasure command, used for debugging, disabled by default */ ++/* LEB erasure command, used for debugging, disabled by default */ + #define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t) ++/* Atomic LEB change command */ ++#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t) ++/* Map LEB command */ ++#define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req) ++/* Unmap LEB command */ ++#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, int32_t) ++/* Check if LEB is mapped command */ ++#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, int32_t) ++/* Set an UBI volume property */ ++#define UBI_IOCSETPROP _IOW(UBI_VOL_IOC_MAGIC, 6, struct ubi_set_prop_req) ++ ++/* Maximum MTD device name length supported by UBI */ ++#define MAX_UBI_MTD_NAME_LEN 127 ++ ++/* Maximum amount of UBI volumes that can be re-named at one go */ ++#define UBI_MAX_RNVOL 32 ++ ++/* ++ * UBI data type hint constants. ++ * ++ * UBI_LONGTERM: long-term data ++ * UBI_SHORTTERM: short-term data ++ * UBI_UNKNOWN: data persistence is unknown ++ * ++ * These constants are used when data is written to UBI volumes in order to ++ * help the UBI wear-leveling unit to find more appropriate physical ++ * eraseblocks. ++ */ ++enum { ++ UBI_LONGTERM = 1, ++ UBI_SHORTTERM = 2, ++ UBI_UNKNOWN = 3, ++}; + + /* + * UBI volume type constants. +@@ -97,22 +217,68 @@ + */ + enum { + UBI_DYNAMIC_VOLUME = 3, +- UBI_STATIC_VOLUME = 4 ++ UBI_STATIC_VOLUME = 4, ++}; ++ ++/* ++ * UBI set property ioctl constants ++ * ++ * @UBI_PROP_DIRECT_WRITE: allow / disallow user to directly write and ++ * erase individual eraseblocks on dynamic volumes ++ */ ++enum { ++ UBI_PROP_DIRECT_WRITE = 1, ++}; ++ ++/** ++ * struct ubi_attach_req - attach MTD device request. ++ * @ubi_num: UBI device number to create ++ * @mtd_num: MTD device number to attach ++ * @vid_hdr_offset: VID header offset (use defaults if %0) ++ * @padding: reserved for future, not used, has to be zeroed ++ * ++ * This data structure is used to specify MTD device UBI has to attach and the ++ * parameters it has to use. The number which should be assigned to the new UBI ++ * device is passed in @ubi_num. UBI may automatically assign the number if ++ * @UBI_DEV_NUM_AUTO is passed. In this case, the device number is returned in ++ * @ubi_num. ++ * ++ * Most applications should pass %0 in @vid_hdr_offset to make UBI use default ++ * offset of the VID header within physical eraseblocks. The default offset is ++ * the next min. I/O unit after the EC header. For example, it will be offset ++ * 512 in case of a 512 bytes page NAND flash with no sub-page support. Or ++ * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages. ++ * ++ * But in rare cases, if this optimizes things, the VID header may be placed to ++ * a different offset. For example, the boot-loader might do things faster if ++ * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. ++ * As the boot-loader would not normally need to read EC headers (unless it ++ * needs UBI in RW mode), it might be faster to calculate ECC. This is weird ++ * example, but it real-life example. So, in this example, @vid_hdr_offer would ++ * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes ++ * aligned, which is OK, as UBI is clever enough to realize this is 4th ++ * sub-page of the first page and add needed padding. ++ */ ++struct ubi_attach_req { ++ int32_t ubi_num; ++ int32_t mtd_num; ++ int32_t vid_hdr_offset; ++ int8_t padding[12]; + }; + + /** + * struct ubi_mkvol_req - volume description data structure used in +- * volume creation requests. ++ * volume creation requests. + * @vol_id: volume number + * @alignment: volume alignment + * @bytes: volume size in bytes + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) +- * @padding1: reserved for future, not used ++ * @padding1: reserved for future, not used, has to be zeroed + * @name_len: volume name length +- * @padding2: reserved for future, not used ++ * @padding2: reserved for future, not used, has to be zeroed + * @name: volume name + * +- * This structure is used by userspace programs when creating new volumes. The ++ * This structure is used by user-space programs when creating new volumes. The + * @used_bytes field is only necessary when creating static volumes. + * + * The @alignment field specifies the required alignment of the volume logical +@@ -139,7 +305,7 @@ + int8_t padding1; + int16_t name_len; + int8_t padding2[4]; +- char name[UBI_MAX_VOLUME_NAME+1]; ++ char name[UBI_MAX_VOLUME_NAME + 1]; + } __attribute__ ((packed)); + + /** +@@ -158,4 +324,87 @@ + int32_t vol_id; + } __attribute__ ((packed)); + ++/** ++ * struct ubi_rnvol_req - volumes re-name request. ++ * @count: count of volumes to re-name ++ * @padding1: reserved for future, not used, has to be zeroed ++ * @vol_id: ID of the volume to re-name ++ * @name_len: name length ++ * @padding2: reserved for future, not used, has to be zeroed ++ * @name: new volume name ++ * ++ * UBI allows to re-name up to %32 volumes at one go. The count of volumes to ++ * re-name is specified in the @count field. The ID of the volumes to re-name ++ * and the new names are specified in the @vol_id and @name fields. ++ * ++ * The UBI volume re-name operation is atomic, which means that should power cut ++ * happen, the volumes will have either old name or new name. So the possible ++ * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes ++ * A and B one may create temporary volumes %A1 and %B1 with the new contents, ++ * then atomically re-name A1->A and B1->B, in which case old %A and %B will ++ * be removed. ++ * ++ * If it is not desirable to remove old A and B, the re-name request has to ++ * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1 ++ * become A and B, and old A and B will become A1 and B1. ++ * ++ * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1 ++ * and B1 become A and B, and old A and B become X and Y. ++ * ++ * In other words, in case of re-naming into an existing volume name, the ++ * existing volume is removed, unless it is re-named as well at the same ++ * re-name request. ++ */ ++struct ubi_rnvol_req { ++ int32_t count; ++ int8_t padding1[12]; ++ struct { ++ int32_t vol_id; ++ int16_t name_len; ++ int8_t padding2[2]; ++ char name[UBI_MAX_VOLUME_NAME + 1]; ++ } ents[UBI_MAX_RNVOL]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubi_leb_change_req - a data structure used in atomic LEB change ++ * requests. ++ * @lnum: logical eraseblock number to change ++ * @bytes: how many bytes will be written to the logical eraseblock ++ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) ++ * @padding: reserved for future, not used, has to be zeroed ++ */ ++struct ubi_leb_change_req { ++ int32_t lnum; ++ int32_t bytes; ++ int8_t dtype; ++ int8_t padding[7]; ++} __attribute__ ((packed)); ++ ++/** ++ * struct ubi_map_req - a data structure used in map LEB requests. ++ * @lnum: logical eraseblock number to unmap ++ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) ++ * @padding: reserved for future, not used, has to be zeroed ++ */ ++struct ubi_map_req { ++ int32_t lnum; ++ int8_t dtype; ++ int8_t padding[3]; ++} __attribute__ ((packed)); ++ ++ ++/** ++ * struct ubi_set_prop_req - a data structure used to set an ubi volume ++ * property. ++ * @property: property to set (%UBI_PROP_DIRECT_WRITE) ++ * @padding: reserved for future, not used, has to be zeroed ++ * @value: value to set ++ */ ++struct ubi_set_prop_req { ++ uint8_t property; ++ uint8_t padding[7]; ++ uint64_t value; ++} __attribute__ ((packed)); ++ + #endif /* __UBI_USER_H__ */ +diff -Nurd linux-2.6.24/init/do_mounts.c ubifs-v2.6.24/init/do_mounts.c +--- linux-2.6.24/init/do_mounts.c 2008-01-25 00:58:37.000000000 +0200 ++++ ubifs-v2.6.24/init/do_mounts.c 2009-04-07 17:14:47.000000000 +0200 +@@ -440,7 +440,8 @@ + + if (saved_root_name[0]) { + root_device_name = saved_root_name; +- if (!strncmp(root_device_name, "mtd", 3)) { ++ if (!strncmp(root_device_name, "mtd", 3) || ++ !strncmp(root_device_name, "ubi", 3)) { + mount_block_root(root_device_name, root_mountflags); + goto out; + } diff --git a/recipes/linux/linux_2.6.24.bb b/recipes/linux/linux_2.6.24.bb index eb0330c3c9..c0278bf250 100644 --- a/recipes/linux/linux_2.6.24.bb +++ b/recipes/linux/linux_2.6.24.bb @@ -12,10 +12,11 @@ DEFAULT_PREFERENCE_oxnas = "1" DEFAULT_PREFERENCE_hipox = "1" DEFAULT_PREFERENCE_cs-e9302 = "1" -PR = "r30" +PR = "r31" SRC_URI = "${KERNELORG_MIRROR}/pub/linux/kernel/v2.6/linux-2.6.24.tar.bz2 \ file://squashfs-lzma-2.6.24.patch;patch=1 \ + file://ubifs-v2.6.24.patch;patch=1 \ file://time.h.patch;patch=1 \ file://defconfig" |