diff options
Diffstat (limited to 'main/linux-scst/unionfs-2.5.7_for_2.6.36.diff')
-rw-r--r-- | main/linux-scst/unionfs-2.5.7_for_2.6.36.diff | 11253 |
1 files changed, 11253 insertions, 0 deletions
diff --git a/main/linux-scst/unionfs-2.5.7_for_2.6.36.diff b/main/linux-scst/unionfs-2.5.7_for_2.6.36.diff new file mode 100644 index 000000000..fabe75809 --- /dev/null +++ b/main/linux-scst/unionfs-2.5.7_for_2.6.36.diff @@ -0,0 +1,11253 @@ +diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX +index 4303614..5ade4a8 100644 +--- a/Documentation/filesystems/00-INDEX ++++ b/Documentation/filesystems/00-INDEX +@@ -112,6 +112,8 @@ udf.txt + - info and mount options for the UDF filesystem. + ufs.txt + - info on the ufs filesystem. ++unionfs/ ++ - info on the unionfs filesystem + vfat.txt + - info on using the VFAT filesystem used in Windows NT and Windows 95 + vfs.txt +diff --git a/Documentation/filesystems/unionfs/00-INDEX b/Documentation/filesystems/unionfs/00-INDEX +new file mode 100644 +index 0000000..96fdf67 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/00-INDEX +@@ -0,0 +1,10 @@ ++00-INDEX ++ - this file. ++concepts.txt ++ - A brief introduction of concepts. ++issues.txt ++ - A summary of known issues with unionfs. ++rename.txt ++ - Information regarding rename operations. ++usage.txt ++ - Usage information and examples. +diff --git a/Documentation/filesystems/unionfs/concepts.txt b/Documentation/filesystems/unionfs/concepts.txt +new file mode 100644 +index 0000000..b853788 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/concepts.txt +@@ -0,0 +1,287 @@ ++Unionfs 2.x CONCEPTS: ++===================== ++ ++This file describes the concepts needed by a namespace unification file ++system. ++ ++ ++Branch Priority: ++================ ++ ++Each branch is assigned a unique priority - starting from 0 (highest ++priority). No two branches can have the same priority. ++ ++ ++Branch Mode: ++============ ++ ++Each branch is assigned a mode - read-write or read-only. This allows ++directories on media mounted read-write to be used in a read-only manner. ++ ++ ++Whiteouts: ++========== ++ ++A whiteout removes a file name from the namespace. Whiteouts are needed when ++one attempts to remove a file on a read-only branch. ++ ++Suppose we have a two-branch union, where branch 0 is read-write and branch ++1 is read-only. And a file 'foo' on branch 1: ++ ++./b0/ ++./b1/ ++./b1/foo ++ ++The unified view would simply be: ++ ++./union/ ++./union/foo ++ ++Since 'foo' is stored on a read-only branch, it cannot be removed. A ++whiteout is used to remove the name 'foo' from the unified namespace. Again, ++since branch 1 is read-only, the whiteout cannot be created there. So, we ++try on a higher priority (lower numerically) branch and create the whiteout ++there. ++ ++./b0/ ++./b0/.wh.foo ++./b1/ ++./b1/foo ++ ++Later, when Unionfs traverses branches (due to lookup or readdir), it ++eliminate 'foo' from the namespace (as well as the whiteout itself.) ++ ++ ++Opaque Directories: ++=================== ++ ++Assume we have a unionfs mount comprising of two branches. Branch 0 is ++empty; branch 1 has the directory /a and file /a/f. Let's say we mount a ++union of branch 0 as read-write and branch 1 as read-only. Now, let's say ++we try to perform the following operation in the union: ++ ++ rm -fr a ++ ++Because branch 1 is not writable, we cannot physically remove the file /a/f ++or the directory /a. So instead, we will create a whiteout in branch 0 ++named /.wh.a, masking out the name "a" from branch 1. Next, let's say we ++try to create a directory named "a" as follows: ++ ++ mkdir a ++ ++Because we have a whiteout for "a" already, Unionfs behaves as if "a" ++doesn't exist, and thus will delete the whiteout and replace it with an ++actual directory named "a". ++ ++The problem now is that if you try to "ls" in the union, Unionfs will ++perform is normal directory name unification, for *all* directories named ++"a" in all branches. This will cause the file /a/f from branch 1 to ++re-appear in the union's namespace, which violates Unix semantics. ++ ++To avoid this problem, we have a different form of whiteouts for ++directories, called "opaque directories" (same as BSD Union Mount does). ++Whenever we replace a whiteout with a directory, that directory is marked as ++opaque. In Unionfs 2.x, it means that we create a file named ++/a/.wh.__dir_opaque in branch 0, after having created directory /a there. ++When unionfs notices that a directory is opaque, it stops all namespace ++operations (including merging readdir contents) at that opaque directory. ++This prevents re-exposing names from masked out directories. ++ ++ ++Duplicate Elimination: ++====================== ++ ++It is possible for files on different branches to have the same name. ++Unionfs then has to select which instance of the file to show to the user. ++Given the fact that each branch has a priority associated with it, the ++simplest solution is to take the instance from the highest priority ++(numerically lowest value) and "hide" the others. ++ ++ ++Unlinking: ++========= ++ ++Unlink operation on non-directory instances is optimized to remove the ++maximum possible objects in case multiple underlying branches have the same ++file name. The unlink operation will first try to delete file instances ++from highest priority branch and then move further to delete from remaining ++branches in order of their decreasing priority. Consider a case (F..D..F), ++where F is a file and D is a directory of the same name; here, some ++intermediate branch could have an empty directory instance with the same ++name, so this operation also tries to delete this directory instance and ++proceed further to delete from next possible lower priority branch. The ++unionfs unlink operation will smoothly delete the files with same name from ++all possible underlying branches. In case if some error occurs, it creates ++whiteout in highest priority branch that will hide file instance in rest of ++the branches. An error could occur either if an unlink operations in any of ++the underlying branch failed or if a branch has no write permission. ++ ++This unlinking policy is known as "delete all" and it has the benefit of ++overall reducing the number of inodes used by duplicate files, and further ++reducing the total number of inodes consumed by whiteouts. The cost is of ++extra processing, but testing shows this extra processing is well worth the ++savings. ++ ++ ++Copyup: ++======= ++ ++When a change is made to the contents of a file's data or meta-data, they ++have to be stored somewhere. The best way is to create a copy of the ++original file on a branch that is writable, and then redirect the write ++though to this copy. The copy must be made on a higher priority branch so ++that lookup and readdir return this newer "version" of the file rather than ++the original (see duplicate elimination). ++ ++An entire unionfs mount can be read-only or read-write. If it's read-only, ++then none of the branches will be written to, even if some of the branches ++are physically writeable. If the unionfs mount is read-write, then the ++leftmost (highest priority) branch must be writeable (for copyup to take ++place); the remaining branches can be any mix of read-write and read-only. ++ ++In a writeable mount, unionfs will create new files/dir in the leftmost ++branch. If one tries to modify a file in a read-only branch/media, unionfs ++will copyup the file to the leftmost branch and modify it there. If you try ++to modify a file from a writeable branch which is not the leftmost branch, ++then unionfs will modify it in that branch; this is useful if you, say, ++unify differnet packages (e.g., apache, sendmail, ftpd, etc.) and you want ++changes to specific package files to remain logically in the directory where ++they came from. ++ ++Cache Coherency: ++================ ++ ++Unionfs users often want to be able to modify files and directories directly ++on the lower branches, and have those changes be visible at the Unionfs ++level. This means that data (e.g., pages) and meta-data (dentries, inodes, ++open files, etc.) have to be synchronized between the upper and lower ++layers. In other words, the newest changes from a layer below have to be ++propagated to the Unionfs layer above. If the two layers are not in sync, a ++cache incoherency ensues, which could lead to application failures and even ++oopses. The Linux kernel, however, has a rather limited set of mechanisms ++to ensure this inter-layer cache coherency---so Unionfs has to do most of ++the hard work on its own. ++ ++Maintaining Invariants: ++ ++The way Unionfs ensures cache coherency is as follows. At each entry point ++to a Unionfs file system method, we call a utility function to validate the ++primary objects of this method. Generally, we call unionfs_file_revalidate ++on open files, and __unionfs_d_revalidate_chain on dentries (which also ++validates inodes). These utility functions check to see whether the upper ++Unionfs object is in sync with any of the lower objects that it represents. ++The checks we perform include whether the Unionfs superblock has a newer ++generation number, or if any of the lower objects mtime's or ctime's are ++newer. (Note: generation numbers change when branch-management commands are ++issued, so in a way, maintaining cache coherency is also very important for ++branch-management.) If indeed we determine that any Unionfs object is no ++longer in sync with its lower counterparts, then we rebuild that object ++similarly to how we do so for branch-management. ++ ++While rebuilding Unionfs's objects, we also purge any page mappings and ++truncate inode pages (see fs/unionfs/dentry.c:purge_inode_data). This is to ++ensure that Unionfs will re-get the newer data from the lower branches. We ++perform this purging only if the Unionfs operation in question is a reading ++operation; if Unionfs is performing a data writing operation (e.g., ->write, ++->commit_write, etc.) then we do NOT flush the lower mappings/pages: this is ++because (1) a self-deadlock could occur and (2) the upper Unionfs pages are ++considered more authoritative anyway, as they are newer and will overwrite ++any lower pages. ++ ++Unionfs maintains the following important invariant regarding mtime's, ++ctime's, and atime's: the upper inode object's times are the max() of all of ++the lower ones. For non-directory objects, there's only one object below, ++so the mapping is simple; for directory objects, there could me multiple ++lower objects and we have to sync up with the newest one of all the lower ++ones. This invariant is important to maintain, especially for directories ++(besides, we need this to be POSIX compliant). A union could comprise ++multiple writable branches, each of which could change. If we don't reflect ++the newest possible mtime/ctime, some applications could fail. For example, ++NFSv2/v3 exports check for newer directory mtimes on the server to determine ++if the client-side attribute cache should be purged. ++ ++To maintain these important invariants, of course, Unionfs carefully ++synchronizes upper and lower times in various places. For example, if we ++copy-up a file to a top-level branch, the parent directory where the file ++was copied up to will now have a new mtime: so after a successful copy-up, ++we sync up with the new top-level branch's parent directory mtime. ++ ++Implementation: ++ ++This cache-coherency implementation is efficient because it defers any ++synchronizing between the upper and lower layers until absolutely needed. ++Consider the example a common situation where users perform a lot of lower ++changes, such as untarring a whole package. While these take place, ++typically the user doesn't access the files via Unionfs; only after the ++lower changes are done, does the user try to access the lower files. With ++our cache-coherency implementation, the entirety of the changes to the lower ++branches will not result in a single CPU cycle spent at the Unionfs level ++until the user invokes a system call that goes through Unionfs. ++ ++We have considered two alternate cache-coherency designs. (1) Using the ++dentry/inode notify functionality to register interest in finding out about ++any lower changes. This is a somewhat limited and also a heavy-handed ++approach which could result in many notifications to the Unionfs layer upon ++each small change at the lower layer (imagine a file being modified multiple ++times in rapid succession). (2) Rewriting the VFS to support explicit ++callbacks from lower objects to upper objects. We began exploring such an ++implementation, but found it to be very complicated--it would have resulted ++in massive VFS/MM changes which are unlikely to be accepted by the LKML ++community. We therefore believe that our current cache-coherency design and ++implementation represent the best approach at this time. ++ ++Limitations: ++ ++Our implementation works in that as long as a user process will have caused ++Unionfs to be called, directly or indirectly, even to just do ++->d_revalidate; then we will have purged the current Unionfs data and the ++process will see the new data. For example, a process that continually ++re-reads the same file's data will see the NEW data as soon as the lower ++file had changed, upon the next read(2) syscall (even if the file is still ++open!) However, this doesn't work when the process re-reads the open file's ++data via mmap(2) (unless the user unmaps/closes the file and remaps/reopens ++it). Once we respond to ->readpage(s), then the kernel maps the page into ++the process's address space and there doesn't appear to be a way to force ++the kernel to invalidate those pages/mappings, and force the process to ++re-issue ->readpage. If there's a way to invalidate active mappings and ++force a ->readpage, let us know please (invalidate_inode_pages2 doesn't do ++the trick). ++ ++Our current Unionfs code has to perform many file-revalidation calls. It ++would be really nice if the VFS would export an optional file system hook ++->file_revalidate (similarly to dentry->d_revalidate) that will be called ++before each VFS op that has a "struct file" in it. ++ ++Certain file systems have micro-second granularity (or better) for inode ++times, and asynchronous actions could cause those times to change with some ++small delay. In such cases, Unionfs may see a changed inode time that only ++differs by a tiny fraction of a second: such a change may be a false ++positive indication that the lower object has changed, whereas if unionfs ++waits a little longer, that false indication will not be seen. (These false ++positives are harmless, because they would at most cause unionfs to ++re-validate an object that may need no revalidation, and print a debugging ++message that clutters the console/logs.) Therefore, to minimize the chances ++of these situations, we delay the detection of changed times by a small ++factor of a few seconds, called UNIONFS_MIN_CC_TIME (which defaults to 3 ++seconds, as does NFS). This means that we will detect the change, only a ++couple of seconds later, if indeed the time change persists in the lower ++file object. This delayed detection has an added performance benefit: we ++reduce the number of times that unionfs has to revalidate objects, in case ++there's a lot of concurrent activity on both the upper and lower objects, ++for the same file(s). Lastly, this delayed time attribute detection is ++similar to how NFS clients operate (e.g., acregmin). ++ ++Finally, there is no way currently in Linux to prevent lower directories ++from being moved around (i.e., topology changes); there's no way to prevent ++modifications to directory sub-trees of whole file systems which are mounted ++read-write. It is therefore possible for in-flight operations in unionfs to ++take place, while a lower directory is being moved around. Therefore, if ++you try to, say, create a new file in a directory through unionfs, while the ++directory is being moved around directly, then the new file may get created ++in the new location where that directory was moved to. This is a somewhat ++similar behaviour in NFS: an NFS client could be creating a new file while ++th NFS server is moving th directory around; the file will get successfully ++created in the new location. (The one exception in unionfs is that if the ++branch is marked read-only by unionfs, then a copyup will take place.) ++ ++For more information, see <http://unionfs.filesystems.org/>. +diff --git a/Documentation/filesystems/unionfs/issues.txt b/Documentation/filesystems/unionfs/issues.txt +new file mode 100644 +index 0000000..f4b7e7e +--- /dev/null ++++ b/Documentation/filesystems/unionfs/issues.txt +@@ -0,0 +1,28 @@ ++KNOWN Unionfs 2.x ISSUES: ++========================= ++ ++1. Unionfs should not use lookup_one_len() on the underlying f/s as it ++ confuses NFSv4. Currently, unionfs_lookup() passes lookup intents to the ++ lower file-system, this eliminates part of the problem. The remaining ++ calls to lookup_one_len may need to be changed to pass an intent. We are ++ currently introducing VFS changes to fs/namei.c's do_path_lookup() to ++ allow proper file lookup and opening in stackable file systems. ++ ++2. Lockdep (a debugging feature) isn't aware of stacking, and so it ++ incorrectly complains about locking problems. The problem boils down to ++ this: Lockdep considers all objects of a certain type to be in the same ++ class, for example, all inodes. Lockdep doesn't like to see a lock held ++ on two inodes within the same task, and warns that it could lead to a ++ deadlock. However, stackable file systems do precisely that: they lock ++ an upper object, and then a lower object, in a strict order to avoid ++ locking problems; in addition, Unionfs, as a fan-out file system, may ++ have to lock several lower inodes. We are currently looking into Lockdep ++ to see how to make it aware of stackable file systems. For now, we ++ temporarily disable lockdep when calling vfs methods on lower objects, ++ but only for those places where lockdep complained. While this solution ++ may seem unclean, it is not without precedent: other places in the kernel ++ also do similar temporary disabling, of course after carefully having ++ checked that it is the right thing to do. Anyway, you get any warnings ++ from Lockdep, please report them to the Unionfs maintainers. ++ ++For more information, see <http://unionfs.filesystems.org/>. +diff --git a/Documentation/filesystems/unionfs/rename.txt b/Documentation/filesystems/unionfs/rename.txt +new file mode 100644 +index 0000000..e20bb82 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/rename.txt +@@ -0,0 +1,31 @@ ++Rename is a complex beast. The following table shows which rename(2) operations ++should succeed and which should fail. ++ ++o: success ++E: error (either unionfs or vfs) ++X: EXDEV ++ ++none = file does not exist ++file = file is a file ++dir = file is a empty directory ++child= file is a non-empty directory ++wh = file is a directory containing only whiteouts; this makes it logically ++ empty ++ ++ none file dir child wh ++file o o E E E ++dir o E o E o ++child X E X E X ++wh o E o E o ++ ++ ++Renaming directories: ++===================== ++ ++Whenever a empty (either physically or logically) directory is being renamed, ++the following sequence of events should take place: ++ ++1) Remove whiteouts from both source and destination directory ++2) Rename source to destination ++3) Make destination opaque to prevent anything under it from showing up ++ +diff --git a/Documentation/filesystems/unionfs/usage.txt b/Documentation/filesystems/unionfs/usage.txt +new file mode 100644 +index 0000000..1adde69 +--- /dev/null ++++ b/Documentation/filesystems/unionfs/usage.txt +@@ -0,0 +1,134 @@ ++Unionfs is a stackable unification file system, which can appear to merge ++the contents of several directories (branches), while keeping their physical ++content separate. Unionfs is useful for unified source tree management, ++merged contents of split CD-ROM, merged separate software package ++directories, data grids, and more. Unionfs allows any mix of read-only and ++read-write branches, as well as insertion and deletion of branches anywhere ++in the fan-out. To maintain Unix semantics, Unionfs handles elimination of ++duplicates, partial-error conditions, and more. ++ ++GENERAL SYNTAX ++============== ++ ++# mount -t unionfs -o <OPTIONS>,<BRANCH-OPTIONS> none MOUNTPOINT ++ ++OPTIONS can be any legal combination of: ++ ++- ro # mount file system read-only ++- rw # mount file system read-write ++- remount # remount the file system (see Branch Management below) ++- incgen # increment generation no. (see Cache Consistency below) ++ ++BRANCH-OPTIONS can be either (1) a list of branches given to the "dirs=" ++option, or (2) a list of individual branch manipulation commands, combined ++with the "remount" option, and is further described in the "Branch ++Management" section below. ++ ++The syntax for the "dirs=" mount option is: ++ ++ dirs=branch[=ro|=rw][:...] ++ ++The "dirs=" option takes a colon-delimited list of directories to compose ++the union, with an optional branch mode for each of those directories. ++Directories that come earlier (specified first, on the left) in the list ++have a higher precedence than those which come later. Additionally, ++read-only or read-write permissions of the branch can be specified by ++appending =ro or =rw (default) to each directory. See the Copyup section in ++concepts.txt, for a description of Unionfs's behavior when mixing read-only ++and read-write branches and mounts. ++ ++Syntax: ++ ++ dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw] ++ ++Example: ++ ++ dirs=/writable_branch=rw:/read-only_branch=ro ++ ++ ++BRANCH MANAGEMENT ++================= ++ ++Once you mount your union for the first time, using the "dirs=" option, you ++can then change the union's overall mode or reconfigure the branches, using ++the remount option, as follows. ++ ++To downgrade a union from read-write to read-only: ++ ++# mount -t unionfs -o remount,ro none MOUNTPOINT ++ ++To upgrade a union from read-only to read-write: ++ ++# mount -t unionfs -o remount,rw none MOUNTPOINT ++ ++To delete a branch /foo, regardless where it is in the current union: ++ ++# mount -t unionfs -o remount,del=/foo none MOUNTPOINT ++ ++To insert (add) a branch /foo before /bar: ++ ++# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT ++ ++To insert (add) a branch /foo (with the "rw" mode flag) before /bar: ++ ++# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT ++ ++To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a ++new highest-priority branch), you can use the above syntax, or use a short ++hand version as follows: ++ ++# mount -t unionfs -o remount,add=/foo none MOUNTPOINT ++ ++To append a branch to the very end (new lowest-priority branch): ++ ++# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT ++ ++To append a branch to the very end (new lowest-priority branch), in ++read-only mode: ++ ++# mount -t unionfs -o remount,add=:/foo=ro none MOUNTPOINT ++ ++Finally, to change the mode of one existing branch, say /foo, from read-only ++to read-write, and change /bar from read-write to read-only: ++ ++# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT ++ ++Note: in Unionfs 2.x, you cannot set the leftmost branch to readonly because ++then Unionfs won't have any writable place for copyups to take place. ++Moreover, the VFS can get confused when it tries to modify something in a ++file system mounted read-write, but isn't permitted to write to it. ++Instead, you should set the whole union as readonly, as described above. ++If, however, you must set the leftmost branch as readonly, perhaps so you ++can get a snapshot of it at a point in time, then you should insert a new ++writable top-level branch, and mark the one you want as readonly. This can ++be accomplished as follows, assuming that /foo is your current leftmost ++branch: ++ ++# mount -t tmpfs -o size=NNN /new ++# mount -t unionfs -o remount,add=/new,mode=/foo=ro none MOUNTPOINT ++<do what you want safely in /foo> ++# mount -t unionfs -o remount,del=/new,mode=/foo=rw none MOUNTPOINT ++<check if there's anything in /new you want to preserve> ++# umount /new ++ ++CACHE CONSISTENCY ++================= ++ ++If you modify any file on any of the lower branches directly, while there is ++a Unionfs 2.x mounted above any of those branches, you should tell Unionfs ++to purge its caches and re-get the objects. To do that, you have to ++increment the generation number of the superblock using the following ++command: ++ ++# mount -t unionfs -o remount,incgen none MOUNTPOINT ++ ++Note that the older way of incrementing the generation number using an ++ioctl, is no longer supported in Unionfs 2.0 and newer. Ioctls in general ++are not encouraged. Plus, an ioctl is per-file concept, whereas the ++generation number is a per-file-system concept. Worse, such an ioctl ++requires an open file, which then has to be invalidated by the very nature ++of the generation number increase (read: the old generation increase ioctl ++was pretty racy). ++ ++ ++For more information, see <http://unionfs.filesystems.org/>. +diff --git a/MAINTAINERS b/MAINTAINERS +index f2a2b8e..11d7f45 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -5917,6 +5917,14 @@ F: Documentation/cdrom/ + F: drivers/cdrom/cdrom.c + F: include/linux/cdrom.h + ++UNIONFS ++P: Erez Zadok ++M: ezk@cs.sunysb.edu ++L: unionfs@filesystems.org ++W: http://unionfs.filesystems.org/ ++T: git git.kernel.org/pub/scm/linux/kernel/git/ezk/unionfs.git ++S: Maintained ++ + UNSORTED BLOCK IMAGES (UBI) + M: Artem Bityutskiy <dedekind1@gmail.com> + W: http://www.linux-mtd.infradead.org/ +diff --git a/fs/Kconfig b/fs/Kconfig +index 3d18530..65b6aa1 100644 +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -169,6 +169,7 @@ if MISC_FILESYSTEMS + source "fs/adfs/Kconfig" + source "fs/affs/Kconfig" + source "fs/ecryptfs/Kconfig" ++source "fs/unionfs/Kconfig" + source "fs/hfs/Kconfig" + source "fs/hfsplus/Kconfig" + source "fs/befs/Kconfig" +diff --git a/fs/Makefile b/fs/Makefile +index e6ec1d3..787332e 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -84,6 +84,7 @@ obj-$(CONFIG_ISO9660_FS) += isofs/ + obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ + obj-$(CONFIG_HFS_FS) += hfs/ + obj-$(CONFIG_ECRYPT_FS) += ecryptfs/ ++obj-$(CONFIG_UNION_FS) += unionfs/ + obj-$(CONFIG_VXFS_FS) += freevxfs/ + obj-$(CONFIG_NFS_FS) += nfs/ + obj-$(CONFIG_EXPORTFS) += exportfs/ +diff --git a/fs/namei.c b/fs/namei.c +index 24896e8..db22420 100644 +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -385,6 +385,7 @@ void release_open_intent(struct nameidata *nd) + else + fput(nd->intent.open.file); + } ++EXPORT_SYMBOL_GPL(release_open_intent); + + static inline struct dentry * + do_revalidate(struct dentry *dentry, struct nameidata *nd) +diff --git a/fs/splice.c b/fs/splice.c +index 8f1dfae..7a57fab 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -1092,8 +1092,8 @@ EXPORT_SYMBOL(generic_splice_sendpage); + /* + * Attempt to initiate a splice from pipe to file. + */ +-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, +- loff_t *ppos, size_t len, unsigned int flags) ++long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out, ++ loff_t *ppos, size_t len, unsigned int flags) + { + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, + loff_t *, size_t, unsigned int); +@@ -1116,13 +1116,14 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, + + return splice_write(pipe, out, ppos, len, flags); + } ++EXPORT_SYMBOL_GPL(vfs_splice_from); + + /* + * Attempt to initiate a splice from a file to a pipe. + */ +-static long do_splice_to(struct file *in, loff_t *ppos, +- struct pipe_inode_info *pipe, size_t len, +- unsigned int flags) ++long vfs_splice_to(struct file *in, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags) + { + ssize_t (*splice_read)(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); +@@ -1142,6 +1143,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, + + return splice_read(in, ppos, pipe, len, flags); + } ++EXPORT_SYMBOL_GPL(vfs_splice_to); + + /** + * splice_direct_to_actor - splices data directly between two non-pipes +@@ -1211,7 +1213,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, + size_t read_len; + loff_t pos = sd->pos, prev_pos = pos; + +- ret = do_splice_to(in, &pos, pipe, len, flags); ++ ret = vfs_splice_to(in, &pos, pipe, len, flags); + if (unlikely(ret <= 0)) + goto out_release; + +@@ -1270,8 +1272,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, + { + struct file *file = sd->u.file; + +- return do_splice_from(pipe, file, &file->f_pos, sd->total_len, +- sd->flags); ++ return vfs_splice_from(pipe, file, &file->f_pos, sd->total_len, ++ sd->flags); + } + + /** +@@ -1368,7 +1370,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, + } else + off = &out->f_pos; + +- ret = do_splice_from(ipipe, out, off, len, flags); ++ ret = vfs_splice_from(ipipe, out, off, len, flags); + + if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) + ret = -EFAULT; +@@ -1388,7 +1390,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, + } else + off = &in->f_pos; + +- ret = do_splice_to(in, off, opipe, len, flags); ++ ret = vfs_splice_to(in, off, opipe, len, flags); + + if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) + ret = -EFAULT; +diff --git a/fs/stack.c b/fs/stack.c +index 4a6f7f4..7eeef12 100644 +--- a/fs/stack.c ++++ b/fs/stack.c +@@ -1,8 +1,20 @@ ++/* ++ * Copyright (c) 2006-2009 Erez Zadok ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006-2009 Stony Brook University ++ * Copyright (c) 2006-2009 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ + #include <linux/module.h> + #include <linux/fs.h> + #include <linux/fs_stack.h> + +-/* does _NOT_ require i_mutex to be held. ++/* ++ * does _NOT_ require i_mutex to be held. + * + * This function cannot be inlined since i_size_{read,write} is rather + * heavy-weight on 32-bit systems +diff --git a/fs/unionfs/Kconfig b/fs/unionfs/Kconfig +new file mode 100644 +index 0000000..f3c1ac4 +--- /dev/null ++++ b/fs/unionfs/Kconfig +@@ -0,0 +1,24 @@ ++config UNION_FS ++ tristate "Union file system (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ help ++ Unionfs is a stackable unification file system, which appears to ++ merge the contents of several directories (branches), while keeping ++ their physical content separate. ++ ++ See <http://unionfs.filesystems.org> for details ++ ++config UNION_FS_XATTR ++ bool "Unionfs extended attributes" ++ depends on UNION_FS ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page). ++ ++ If unsure, say N. ++ ++config UNION_FS_DEBUG ++ bool "Debug Unionfs" ++ depends on UNION_FS ++ help ++ If you say Y here, you can turn on debugging output from Unionfs. +diff --git a/fs/unionfs/Makefile b/fs/unionfs/Makefile +new file mode 100644 +index 0000000..86c32ba +--- /dev/null ++++ b/fs/unionfs/Makefile +@@ -0,0 +1,17 @@ ++UNIONFS_VERSION="2.5.7 (for 2.6.36)" ++ ++EXTRA_CFLAGS += -DUNIONFS_VERSION=\"$(UNIONFS_VERSION)\" ++ ++obj-$(CONFIG_UNION_FS) += unionfs.o ++ ++unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \ ++ rdstate.o copyup.o dirhelper.o rename.o unlink.o \ ++ lookup.o commonfops.o dirfops.o sioq.o mmap.o whiteout.o ++ ++unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o ++ ++unionfs-$(CONFIG_UNION_FS_DEBUG) += debug.o ++ ++ifeq ($(CONFIG_UNION_FS_DEBUG),y) ++EXTRA_CFLAGS += -DDEBUG ++endif +diff --git a/fs/unionfs/commonfops.c b/fs/unionfs/commonfops.c +new file mode 100644 +index 0000000..51ea65e +--- /dev/null ++++ b/fs/unionfs/commonfops.c +@@ -0,0 +1,896 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * 1) Copyup the file ++ * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously ++ * stolen from NFS's silly rename ++ */ ++static int copyup_deleted_file(struct file *file, struct dentry *dentry, ++ struct dentry *parent, int bstart, int bindex) ++{ ++ static unsigned int counter; ++ const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2; ++ const int countersize = sizeof(counter) * 2; ++ const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1; ++ char name[nlen + 1]; ++ int err; ++ struct dentry *tmp_dentry = NULL; ++ struct dentry *lower_dentry; ++ struct dentry *lower_dir_dentry = NULL; ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bstart); ++ ++ sprintf(name, ".unionfs%*.*lx", ++ i_inosize, i_inosize, lower_dentry->d_inode->i_ino); ++ ++ /* ++ * Loop, looking for an unused temp name to copyup to. ++ * ++ * It's somewhat silly that we look for a free temp tmp name in the ++ * source branch (bstart) instead of the dest branch (bindex), where ++ * the final name will be created. We _will_ catch it if somehow ++ * the name exists in the dest branch, but it'd be nice to catch it ++ * sooner than later. ++ */ ++retry: ++ tmp_dentry = NULL; ++ do { ++ char *suffix = name + nlen - countersize; ++ ++ dput(tmp_dentry); ++ counter++; ++ sprintf(suffix, "%*.*x", countersize, countersize, counter); ++ ++ pr_debug("unionfs: trying to rename %s to %s\n", ++ dentry->d_name.name, name); ++ ++ tmp_dentry = lookup_lck_len(name, lower_dentry->d_parent, ++ nlen); ++ if (IS_ERR(tmp_dentry)) { ++ err = PTR_ERR(tmp_dentry); ++ goto out; ++ } ++ } while (tmp_dentry->d_inode != NULL); /* need negative dentry */ ++ dput(tmp_dentry); ++ ++ err = copyup_named_file(parent->d_inode, file, name, bstart, bindex, ++ i_size_read(file->f_path.dentry->d_inode)); ++ if (err) { ++ if (unlikely(err == -EEXIST)) ++ goto retry; ++ goto out; ++ } ++ ++ /* bring it to the same state as an unlinked file */ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry)); ++ if (!unionfs_lower_inode_idx(dentry->d_inode, bindex)) { ++ atomic_inc(&lower_dentry->d_inode->i_count); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, ++ lower_dentry->d_inode); ++ } ++ lower_dir_dentry = lock_parent(lower_dentry); ++ err = vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); ++ unlock_dir(lower_dir_dentry); ++ ++out: ++ if (!err) ++ unionfs_check_dentry(dentry); ++ return err; ++} ++ ++/* ++ * put all references held by upper struct file and free lower file pointer ++ * array ++ */ ++static void cleanup_file(struct file *file) ++{ ++ int bindex, bstart, bend; ++ struct file **lower_files; ++ struct file *lower_file; ++ struct super_block *sb = file->f_path.dentry->d_sb; ++ ++ lower_files = UNIONFS_F(file)->lower_files; ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ int i; /* holds (possibly) updated branch index */ ++ int old_bid; ++ ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ if (!lower_file) ++ continue; ++ ++ /* ++ * Find new index of matching branch with an open ++ * file, since branches could have been added or ++ * deleted causing the one with open files to shift. ++ */ ++ old_bid = UNIONFS_F(file)->saved_branch_ids[bindex]; ++ i = branch_id_to_idx(sb, old_bid); ++ if (unlikely(i < 0)) { ++ printk(KERN_ERR "unionfs: no superblock for " ++ "file %p\n", file); ++ continue; ++ } ++ ++ /* decrement count of open files */ ++ branchput(sb, i); ++ /* ++ * fput will perform an mntput for us on the correct branch. ++ * Although we're using the file's old branch configuration, ++ * bindex, which is the old index, correctly points to the ++ * right branch in the file's branch list. In other words, ++ * we're going to mntput the correct branch even if branches ++ * have been added/removed. ++ */ ++ fput(lower_file); ++ UNIONFS_F(file)->lower_files[bindex] = NULL; ++ UNIONFS_F(file)->saved_branch_ids[bindex] = -1; ++ } ++ ++ UNIONFS_F(file)->lower_files = NULL; ++ kfree(lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ /* set to NULL because caller needs to know if to kfree on error */ ++ UNIONFS_F(file)->saved_branch_ids = NULL; ++} ++ ++/* open all lower files for a given file */ ++static int open_all_files(struct file *file) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct file *lower_file; ++ struct dentry *lower_dentry; ++ struct dentry *dentry = file->f_path.dentry; ++ struct super_block *sb = dentry->d_sb; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ ++ dget(lower_dentry); ++ unionfs_mntget(dentry, bindex); ++ branchget(sb, bindex); ++ ++ lower_file = ++ dentry_open(lower_dentry, ++ unionfs_lower_mnt_idx(dentry, bindex), ++ file->f_flags, current_cred()); ++ if (IS_ERR(lower_file)) { ++ branchput(sb, bindex); ++ err = PTR_ERR(lower_file); ++ goto out; ++ } else { ++ unionfs_set_lower_file_idx(file, bindex, lower_file); ++ } ++ } ++out: ++ return err; ++} ++ ++/* open the highest priority file for a given upper file */ ++static int open_highest_file(struct file *file, bool willwrite) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct file *lower_file; ++ struct dentry *lower_dentry; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent = dget_parent(dentry); ++ struct inode *parent_inode = parent->d_inode; ++ struct super_block *sb = dentry->d_sb; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) { ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_file(parent_inode, file, bstart, bindex, ++ i_size_read(dentry->d_inode)); ++ if (!err) ++ break; ++ } ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(dentry->d_inode)-> ++ generation)); ++ goto out; ++ } ++ ++ dget(lower_dentry); ++ unionfs_mntget(dentry, bstart); ++ lower_file = dentry_open(lower_dentry, ++ unionfs_lower_mnt_idx(dentry, bstart), ++ file->f_flags, current_cred()); ++ if (IS_ERR(lower_file)) { ++ err = PTR_ERR(lower_file); ++ goto out; ++ } ++ branchget(sb, bstart); ++ unionfs_set_lower_file(file, lower_file); ++ /* Fix up the position. */ ++ lower_file->f_pos = file->f_pos; ++ ++ memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state)); ++out: ++ dput(parent); ++ return err; ++} ++ ++/* perform a delayed copyup of a read-write file on a read-only branch */ ++static int do_delayed_copyup(struct file *file, struct dentry *parent) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct dentry *dentry = file->f_path.dentry; ++ struct inode *parent_inode = parent->d_inode; ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ BUG_ON(!S_ISREG(dentry->d_inode->i_mode)); ++ ++ unionfs_check_file(file); ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ if (!d_deleted(dentry)) ++ err = copyup_file(parent_inode, file, bstart, ++ bindex, ++ i_size_read(dentry->d_inode)); ++ else ++ err = copyup_deleted_file(file, dentry, parent, ++ bstart, bindex); ++ /* if succeeded, set lower open-file flags and break */ ++ if (!err) { ++ struct file *lower_file; ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ lower_file->f_flags = file->f_flags; ++ break; ++ } ++ } ++ if (err || (bstart <= fbstart(file))) ++ goto out; ++ bend = fbend(file); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ if (unionfs_lower_file_idx(file, bindex)) { ++ branchput(dentry->d_sb, bindex); ++ fput(unionfs_lower_file_idx(file, bindex)); ++ unionfs_set_lower_file_idx(file, bindex, NULL); ++ } ++ } ++ path_put_lowers(dentry, bstart, bend, false); ++ iput_lowers(dentry->d_inode, bstart, bend, false); ++ /* for reg file, we only open it "once" */ ++ fbend(file) = fbstart(file); ++ dbend(dentry) = dbstart(dentry); ++ ibend(dentry->d_inode) = ibstart(dentry->d_inode); ++ ++out: ++ unionfs_check_file(file); ++ return err; ++} ++ ++/* ++ * Helper function for unionfs_file_revalidate/locked. ++ * Expects dentry/parent to be locked already, and revalidated. ++ */ ++static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry, ++ struct dentry *parent, ++ struct super_block *sb, int sbgen, ++ int dgen, bool willwrite) ++{ ++ int fgen; ++ int bstart, bend, orig_brid; ++ int size; ++ int err = 0; ++ ++ fgen = atomic_read(&UNIONFS_F(file)->generation); ++ ++ /* ++ * There are two cases we are interested in. The first is if the ++ * generation is lower than the super-block. The second is if ++ * someone has copied up this file from underneath us, we also need ++ * to refresh things. ++ */ ++ if (d_deleted(dentry) || ++ (sbgen <= fgen && ++ dbstart(dentry) == fbstart(file) && ++ unionfs_lower_file(file))) ++ goto out_may_copyup; ++ ++ /* save orig branch ID */ ++ orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; ++ ++ /* First we throw out the existing files. */ ++ cleanup_file(file); ++ ++ /* Now we reopen the file(s) as in unionfs_open. */ ++ bstart = fbstart(file) = dbstart(dentry); ++ bend = fbend(file) = dbend(dentry); ++ ++ size = sizeof(struct file *) * sbmax(sb); ++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->lower_files)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ size = sizeof(int) * sbmax(sb); ++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ if (S_ISDIR(dentry->d_inode->i_mode)) { ++ /* We need to open all the files. */ ++ err = open_all_files(file); ++ if (err) ++ goto out; ++ } else { ++ int new_brid; ++ /* We only open the highest priority branch. */ ++ err = open_highest_file(file, willwrite); ++ if (err) ++ goto out; ++ new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; ++ if (unlikely(new_brid != orig_brid && sbgen > fgen)) { ++ /* ++ * If we re-opened the file on a different branch ++ * than the original one, and this was due to a new ++ * branch inserted, then update the mnt counts of ++ * the old and new branches accordingly. ++ */ ++ unionfs_mntget(dentry, bstart); ++ unionfs_mntput(sb->s_root, ++ branch_id_to_idx(sb, orig_brid)); ++ } ++ /* regular files have only one open lower file */ ++ fbend(file) = fbstart(file); ++ } ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(dentry->d_inode)->generation)); ++ ++out_may_copyup: ++ /* Copyup on the first write to a file on a readonly branch. */ ++ if (willwrite && IS_WRITE_FLAG(file->f_flags) && ++ !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) && ++ is_robranch(dentry)) { ++ pr_debug("unionfs: do delay copyup of \"%s\"\n", ++ dentry->d_name.name); ++ err = do_delayed_copyup(file, parent); ++ /* regular files have only one open lower file */ ++ if (!err && !S_ISDIR(dentry->d_inode->i_mode)) ++ fbend(file) = fbstart(file); ++ } ++ ++out: ++ if (err) { ++ kfree(UNIONFS_F(file)->lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ } ++ return err; ++} ++ ++/* ++ * Revalidate the struct file ++ * @file: file to revalidate ++ * @parent: parent dentry (locked by caller) ++ * @willwrite: true if caller may cause changes to the file; false otherwise. ++ * Caller must lock/unlock dentry's branch configuration. ++ */ ++int unionfs_file_revalidate(struct file *file, struct dentry *parent, ++ bool willwrite) ++{ ++ struct super_block *sb; ++ struct dentry *dentry; ++ int sbgen, dgen; ++ int err = 0; ++ ++ dentry = file->f_path.dentry; ++ sb = dentry->d_sb; ++ verify_locked(dentry); ++ verify_locked(parent); ++ ++ /* ++ * First revalidate the dentry inside struct file, ++ * but not unhashed dentries. ++ */ ++ if (!d_deleted(dentry) && ++ !__unionfs_d_revalidate(dentry, parent, willwrite)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ sbgen = atomic_read(&UNIONFS_SB(sb)->generation); ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ ++ if (unlikely(sbgen > dgen)) { /* XXX: should never happen */ ++ pr_debug("unionfs: failed to revalidate dentry (%s)\n", ++ dentry->d_name.name); ++ err = -ESTALE; ++ goto out; ++ } ++ ++ err = __unionfs_file_revalidate(file, dentry, parent, sb, ++ sbgen, dgen, willwrite); ++out: ++ return err; ++} ++ ++/* unionfs_open helper function: open a directory */ ++static int __open_dir(struct inode *inode, struct file *file) ++{ ++ struct dentry *lower_dentry; ++ struct file *lower_file; ++ int bindex, bstart, bend; ++ struct vfsmount *mnt; ++ ++ bstart = fbstart(file) = dbstart(file->f_path.dentry); ++ bend = fbend(file) = dbend(file->f_path.dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = ++ unionfs_lower_dentry_idx(file->f_path.dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ ++ dget(lower_dentry); ++ unionfs_mntget(file->f_path.dentry, bindex); ++ mnt = unionfs_lower_mnt_idx(file->f_path.dentry, bindex); ++ lower_file = dentry_open(lower_dentry, mnt, file->f_flags, ++ current_cred()); ++ if (IS_ERR(lower_file)) ++ return PTR_ERR(lower_file); ++ ++ unionfs_set_lower_file_idx(file, bindex, lower_file); ++ ++ /* ++ * The branchget goes after the open, because otherwise ++ * we would miss the reference on release. ++ */ ++ branchget(inode->i_sb, bindex); ++ } ++ ++ return 0; ++} ++ ++/* unionfs_open helper function: open a file */ ++static int __open_file(struct inode *inode, struct file *file, ++ struct dentry *parent) ++{ ++ struct dentry *lower_dentry; ++ struct file *lower_file; ++ int lower_flags; ++ int bindex, bstart, bend; ++ ++ lower_dentry = unionfs_lower_dentry(file->f_path.dentry); ++ lower_flags = file->f_flags; ++ ++ bstart = fbstart(file) = dbstart(file->f_path.dentry); ++ bend = fbend(file) = dbend(file->f_path.dentry); ++ ++ /* ++ * check for the permission for lower file. If the error is ++ * COPYUP_ERR, copyup the file. ++ */ ++ if (lower_dentry->d_inode && is_robranch(file->f_path.dentry)) { ++ /* ++ * if the open will change the file, copy it up otherwise ++ * defer it. ++ */ ++ if (lower_flags & O_TRUNC) { ++ int size = 0; ++ int err = -EROFS; ++ ++ /* copyup the file */ ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_file(parent->d_inode, file, ++ bstart, bindex, size); ++ if (!err) ++ break; ++ } ++ return err; ++ } else { ++ /* ++ * turn off writeable flags, to force delayed copyup ++ * by caller. ++ */ ++ lower_flags &= ~(OPEN_WRITE_FLAGS); ++ } ++ } ++ ++ dget(lower_dentry); ++ ++ /* ++ * dentry_open will decrement mnt refcnt if err. ++ * otherwise fput() will do an mntput() for us upon file close. ++ */ ++ unionfs_mntget(file->f_path.dentry, bstart); ++ lower_file = ++ dentry_open(lower_dentry, ++ unionfs_lower_mnt_idx(file->f_path.dentry, bstart), ++ lower_flags, current_cred()); ++ if (IS_ERR(lower_file)) ++ return PTR_ERR(lower_file); ++ ++ unionfs_set_lower_file(file, lower_file); ++ branchget(inode->i_sb, bstart); ++ ++ return 0; ++} ++ ++int unionfs_open(struct inode *inode, struct file *file) ++{ ++ int err = 0; ++ struct file *lower_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ int bindex = 0, bstart = 0, bend = 0; ++ int size; ++ int valid = 0; ++ ++ unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ /* don't open unhashed/deleted files */ ++ if (d_deleted(dentry)) { ++ err = -ENOENT; ++ goto out_nofree; ++ } ++ ++ /* XXX: should I change 'false' below to the 'willwrite' flag? */ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out_nofree; ++ } ++ ++ file->private_data = ++ kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file))) { ++ err = -ENOMEM; ++ goto out_nofree; ++ } ++ fbstart(file) = -1; ++ fbend(file) = -1; ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(inode)->generation)); ++ ++ size = sizeof(struct file *) * sbmax(inode->i_sb); ++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->lower_files)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ size = sizeof(int) * sbmax(inode->i_sb); ++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ bstart = fbstart(file) = dbstart(dentry); ++ bend = fbend(file) = dbend(dentry); ++ ++ /* ++ * open all directories and make the unionfs file struct point to ++ * these lower file structs ++ */ ++ if (S_ISDIR(inode->i_mode)) ++ err = __open_dir(inode, file); /* open a dir */ ++ else ++ err = __open_file(inode, file, parent); /* open a file */ ++ ++ /* freeing the allocated resources, and fput the opened files */ ++ if (err) { ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ if (!lower_file) ++ continue; ++ ++ branchput(dentry->d_sb, bindex); ++ /* fput calls dput for lower_dentry */ ++ fput(lower_file); ++ } ++ } ++ ++out: ++ if (err) { ++ kfree(UNIONFS_F(file)->lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ kfree(UNIONFS_F(file)); ++ } ++out_nofree: ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_copy_attr_times(inode); ++ unionfs_check_file(file); ++ unionfs_check_inode(inode); ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(inode->i_sb); ++ return err; ++} ++ ++/* ++ * release all lower object references & free the file info structure ++ * ++ * No need to grab sb info's rwsem. ++ */ ++int unionfs_file_release(struct inode *inode, struct file *file) ++{ ++ struct file *lower_file = NULL; ++ struct unionfs_file_info *fileinfo; ++ struct unionfs_inode_info *inodeinfo; ++ struct super_block *sb = inode->i_sb; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ int bindex, bstart, bend; ++ int fgen, err = 0; ++ ++ /* ++ * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was ++ * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this ++ * has been causing false positives in file system stacking layers. ++ * In particular, our ->mmap is called after sys_mmap2 already holds ++ * mmap_sem, then we lock our own mutexes; but earlier, it's ++ * possible for lockdep to have locked our mutexes first, and then ++ * we call a lower ->readdir which could call might_fault. The ++ * different ordering of the locks is what lockdep complains about ++ * -- unnecessarily. Therefore, we have no choice but to tell ++ * lockdep to temporarily turn off lockdep here. Note: the comments ++ * inside might_sleep also suggest that it would have been ++ * nicer to only annotate paths that needs that might_lock_read. ++ */ ++ lockdep_off(); ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ /* ++ * We try to revalidate, but the VFS ignores return return values ++ * from file->release, so we must always try to succeed here, ++ * including to do the kfree and dput below. So if revalidation ++ * failed, all we can do is print some message and keep going. ++ */ ++ err = unionfs_file_revalidate(file, parent, ++ UNIONFS_F(file)->wrote_to_file); ++ if (!err) ++ unionfs_check_file(file); ++ fileinfo = UNIONFS_F(file); ++ BUG_ON(file->f_path.dentry->d_inode != inode); ++ inodeinfo = UNIONFS_I(inode); ++ ++ /* fput all the lower files */ ++ fgen = atomic_read(&fileinfo->generation); ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ ++ if (lower_file) { ++ unionfs_set_lower_file_idx(file, bindex, NULL); ++ fput(lower_file); ++ branchput(sb, bindex); ++ } ++ ++ /* if there are no more refs to the dentry, dput it */ ++ if (d_deleted(dentry)) { ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } ++ } ++ ++ kfree(fileinfo->lower_files); ++ kfree(fileinfo->saved_branch_ids); ++ ++ if (fileinfo->rdstate) { ++ fileinfo->rdstate->access = jiffies; ++ spin_lock(&inodeinfo->rdlock); ++ inodeinfo->rdcount++; ++ list_add_tail(&fileinfo->rdstate->cache, ++ &inodeinfo->readdircache); ++ mark_inode_dirty(inode); ++ spin_unlock(&inodeinfo->rdlock); ++ fileinfo->rdstate = NULL; ++ } ++ kfree(fileinfo); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(sb); ++ lockdep_on(); ++ return err; ++} ++ ++/* pass the ioctl to the lower fs */ ++static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ struct file *lower_file; ++ int err; ++ ++ lower_file = unionfs_lower_file(file); ++ ++ err = -ENOTTY; ++ if (!lower_file || !lower_file->f_op) ++ goto out; ++ if (lower_file->f_op->unlocked_ioctl) { ++ err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); ++#ifdef CONFIG_COMPAT ++ } else if (lower_file->f_op->ioctl) { ++ err = lower_file->f_op->compat_ioctl( ++ lower_file->f_path.dentry->d_inode, ++ lower_file, cmd, arg); ++#endif ++ } ++ ++out: ++ return err; ++} ++ ++/* ++ * return to user-space the branch indices containing the file in question ++ * ++ * We use fd_set and therefore we are limited to the number of the branches ++ * to FD_SETSIZE, which is currently 1024 - plenty for most people ++ */ ++static int unionfs_ioctl_queryfile(struct file *file, struct dentry *parent, ++ unsigned int cmd, unsigned long arg) ++{ ++ int err = 0; ++ fd_set branchlist; ++ int bstart = 0, bend = 0, bindex = 0; ++ int orig_bstart, orig_bend; ++ struct dentry *dentry, *lower_dentry; ++ struct vfsmount *mnt; ++ ++ dentry = file->f_path.dentry; ++ orig_bstart = dbstart(dentry); ++ orig_bend = dbend(dentry); ++ err = unionfs_partial_lookup(dentry, parent); ++ if (err) ++ goto out; ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ FD_ZERO(&branchlist); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ if (likely(lower_dentry->d_inode)) ++ FD_SET(bindex, &branchlist); ++ /* purge any lower objects after partial_lookup */ ++ if (bindex < orig_bstart || bindex > orig_bend) { ++ dput(lower_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex)); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, ++ NULL); ++ mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (!mnt) ++ continue; ++ unionfs_mntput(dentry, bindex); ++ unionfs_set_lower_mnt_idx(dentry, bindex, NULL); ++ } ++ } ++ /* restore original dentry's offsets */ ++ dbstart(dentry) = orig_bstart; ++ dbend(dentry) = orig_bend; ++ ibstart(dentry->d_inode) = orig_bstart; ++ ibend(dentry->d_inode) = orig_bend; ++ ++ err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set)); ++ if (unlikely(err)) ++ err = -EFAULT; ++ ++out: ++ return err < 0 ? err : bend; ++} ++ ++long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ long err; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, true); ++ if (unlikely(err)) ++ goto out; ++ ++ /* check if asked for local commands */ ++ switch (cmd) { ++ case UNIONFS_IOCTL_INCGEN: ++ /* Increment the superblock generation count */ ++ pr_info("unionfs: incgen ioctl deprecated; " ++ "use \"-o remount,incgen\"\n"); ++ err = -ENOSYS; ++ break; ++ ++ case UNIONFS_IOCTL_QUERYFILE: ++ /* Return list of branches containing the given file */ ++ err = unionfs_ioctl_queryfile(file, parent, cmd, arg); ++ break; ++ ++ default: ++ /* pass the ioctl down */ ++ err = do_ioctl(file, cmd, arg); ++ break; ++ } ++ ++out: ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++int unionfs_flush(struct file *file, fl_owner_t id) ++{ ++ int err = 0; ++ struct file *lower_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ int bindex, bstart, bend; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, ++ UNIONFS_F(file)->wrote_to_file); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ ++ if (lower_file && lower_file->f_op && ++ lower_file->f_op->flush) { ++ err = lower_file->f_op->flush(lower_file, id); ++ if (err) ++ goto out; ++ } ++ ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff --git a/fs/unionfs/copyup.c b/fs/unionfs/copyup.c +new file mode 100644 +index 0000000..bba3a75 +--- /dev/null ++++ b/fs/unionfs/copyup.c +@@ -0,0 +1,896 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * For detailed explanation of copyup see: ++ * Documentation/filesystems/unionfs/concepts.txt ++ */ ++ ++#ifdef CONFIG_UNION_FS_XATTR ++/* copyup all extended attrs for a given dentry */ ++static int copyup_xattrs(struct dentry *old_lower_dentry, ++ struct dentry *new_lower_dentry) ++{ ++ int err = 0; ++ ssize_t list_size = -1; ++ char *name_list = NULL; ++ char *attr_value = NULL; ++ char *name_list_buf = NULL; ++ ++ /* query the actual size of the xattr list */ ++ list_size = vfs_listxattr(old_lower_dentry, NULL, 0); ++ if (list_size <= 0) { ++ err = list_size; ++ goto out; ++ } ++ ++ /* allocate space for the actual list */ ++ name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX); ++ if (unlikely(!name_list || IS_ERR(name_list))) { ++ err = PTR_ERR(name_list); ++ goto out; ++ } ++ ++ name_list_buf = name_list; /* save for kfree at end */ ++ ++ /* now get the actual xattr list of the source file */ ++ list_size = vfs_listxattr(old_lower_dentry, name_list, list_size); ++ if (list_size <= 0) { ++ err = list_size; ++ goto out; ++ } ++ ++ /* allocate space to hold each xattr's value */ ++ attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX); ++ if (unlikely(!attr_value || IS_ERR(attr_value))) { ++ err = PTR_ERR(name_list); ++ goto out; ++ } ++ ++ /* in a loop, get and set each xattr from src to dst file */ ++ while (*name_list) { ++ ssize_t size; ++ ++ /* Lock here since vfs_getxattr doesn't lock for us */ ++ mutex_lock(&old_lower_dentry->d_inode->i_mutex); ++ size = vfs_getxattr(old_lower_dentry, name_list, ++ attr_value, XATTR_SIZE_MAX); ++ mutex_unlock(&old_lower_dentry->d_inode->i_mutex); ++ if (size < 0) { ++ err = size; ++ goto out; ++ } ++ if (size > XATTR_SIZE_MAX) { ++ err = -E2BIG; ++ goto out; ++ } ++ /* Don't lock here since vfs_setxattr does it for us. */ ++ err = vfs_setxattr(new_lower_dentry, name_list, attr_value, ++ size, 0); ++ /* ++ * Selinux depends on "security.*" xattrs, so to maintain ++ * the security of copied-up files, if Selinux is active, ++ * then we must copy these xattrs as well. So we need to ++ * temporarily get FOWNER privileges. ++ * XXX: move entire copyup code to SIOQ. ++ */ ++ if (err == -EPERM && !capable(CAP_FOWNER)) { ++ const struct cred *old_creds; ++ struct cred *new_creds; ++ ++ new_creds = prepare_creds(); ++ if (unlikely(!new_creds)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ cap_raise(new_creds->cap_effective, CAP_FOWNER); ++ old_creds = override_creds(new_creds); ++ err = vfs_setxattr(new_lower_dentry, name_list, ++ attr_value, size, 0); ++ revert_creds(old_creds); ++ } ++ if (err < 0) ++ goto out; ++ name_list += strlen(name_list) + 1; ++ } ++out: ++ unionfs_xattr_kfree(name_list_buf); ++ unionfs_xattr_kfree(attr_value); ++ /* Ignore if xattr isn't supported */ ++ if (err == -ENOTSUPP || err == -EOPNOTSUPP) ++ err = 0; ++ return err; ++} ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++/* ++ * Determine the mode based on the copyup flags, and the existing dentry. ++ * ++ * Handle file systems which may not support certain options. For example ++ * jffs2 doesn't allow one to chmod a symlink. So we ignore such harmless ++ * errors, rather than propagating them up, which results in copyup errors ++ * and errors returned back to users. ++ */ ++static int copyup_permissions(struct super_block *sb, ++ struct dentry *old_lower_dentry, ++ struct dentry *new_lower_dentry) ++{ ++ struct inode *i = old_lower_dentry->d_inode; ++ struct iattr newattrs; ++ int err; ++ ++ newattrs.ia_atime = i->i_atime; ++ newattrs.ia_mtime = i->i_mtime; ++ newattrs.ia_ctime = i->i_ctime; ++ newattrs.ia_gid = i->i_gid; ++ newattrs.ia_uid = i->i_uid; ++ newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME | ++ ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE | ++ ATTR_GID | ATTR_UID; ++ mutex_lock(&new_lower_dentry->d_inode->i_mutex); ++ err = notify_change(new_lower_dentry, &newattrs); ++ if (err) ++ goto out; ++ ++ /* now try to change the mode and ignore EOPNOTSUPP on symlinks */ ++ newattrs.ia_mode = i->i_mode; ++ newattrs.ia_valid = ATTR_MODE | ATTR_FORCE; ++ err = notify_change(new_lower_dentry, &newattrs); ++ if (err == -EOPNOTSUPP && ++ S_ISLNK(new_lower_dentry->d_inode->i_mode)) { ++ printk(KERN_WARNING ++ "unionfs: changing \"%s\" symlink mode unsupported\n", ++ new_lower_dentry->d_name.name); ++ err = 0; ++ } ++ ++out: ++ mutex_unlock(&new_lower_dentry->d_inode->i_mutex); ++ return err; ++} ++ ++/* ++ * create the new device/file/directory - use copyup_permission to copyup ++ * times, and mode ++ * ++ * if the object being copied up is a regular file, the file is only created, ++ * the contents have to be copied up separately ++ */ ++static int __copyup_ndentry(struct dentry *old_lower_dentry, ++ struct dentry *new_lower_dentry, ++ struct dentry *new_lower_parent_dentry, ++ char *symbuf) ++{ ++ int err = 0; ++ umode_t old_mode = old_lower_dentry->d_inode->i_mode; ++ struct sioq_args args; ++ ++ if (S_ISDIR(old_mode)) { ++ args.mkdir.parent = new_lower_parent_dentry->d_inode; ++ args.mkdir.dentry = new_lower_dentry; ++ args.mkdir.mode = old_mode; ++ ++ run_sioq(__unionfs_mkdir, &args); ++ err = args.err; ++ } else if (S_ISLNK(old_mode)) { ++ args.symlink.parent = new_lower_parent_dentry->d_inode; ++ args.symlink.dentry = new_lower_dentry; ++ args.symlink.symbuf = symbuf; ++ ++ run_sioq(__unionfs_symlink, &args); ++ err = args.err; ++ } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) || ++ S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) { ++ args.mknod.parent = new_lower_parent_dentry->d_inode; ++ args.mknod.dentry = new_lower_dentry; ++ args.mknod.mode = old_mode; ++ args.mknod.dev = old_lower_dentry->d_inode->i_rdev; ++ ++ run_sioq(__unionfs_mknod, &args); ++ err = args.err; ++ } else if (S_ISREG(old_mode)) { ++ struct nameidata nd; ++ err = init_lower_nd(&nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out; ++ args.create.nd = &nd; ++ args.create.parent = new_lower_parent_dentry->d_inode; ++ args.create.dentry = new_lower_dentry; ++ args.create.mode = old_mode; ++ ++ run_sioq(__unionfs_create, &args); ++ err = args.err; ++ release_lower_nd(&nd, err); ++ } else { ++ printk(KERN_CRIT "unionfs: unknown inode type %d\n", ++ old_mode); ++ BUG(); ++ } ++ ++out: ++ return err; ++} ++ ++static int __copyup_reg_data(struct dentry *dentry, ++ struct dentry *new_lower_dentry, int new_bindex, ++ struct dentry *old_lower_dentry, int old_bindex, ++ struct file **copyup_file, loff_t len) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct file *input_file; ++ struct file *output_file; ++ struct vfsmount *output_mnt; ++ mm_segment_t old_fs; ++ char *buf = NULL; ++ ssize_t read_bytes, write_bytes; ++ loff_t size; ++ int err = 0; ++ ++ /* open old file */ ++ unionfs_mntget(dentry, old_bindex); ++ branchget(sb, old_bindex); ++ /* dentry_open calls dput and mntput if it returns an error */ ++ input_file = dentry_open(old_lower_dentry, ++ unionfs_lower_mnt_idx(dentry, old_bindex), ++ O_RDONLY | O_LARGEFILE, current_cred()); ++ if (IS_ERR(input_file)) { ++ dput(old_lower_dentry); ++ err = PTR_ERR(input_file); ++ goto out; ++ } ++ if (unlikely(!input_file->f_op || !input_file->f_op->read)) { ++ err = -EINVAL; ++ goto out_close_in; ++ } ++ ++ /* open new file */ ++ dget(new_lower_dentry); ++ output_mnt = unionfs_mntget(sb->s_root, new_bindex); ++ branchget(sb, new_bindex); ++ output_file = dentry_open(new_lower_dentry, output_mnt, ++ O_RDWR | O_LARGEFILE, current_cred()); ++ if (IS_ERR(output_file)) { ++ err = PTR_ERR(output_file); ++ goto out_close_in2; ++ } ++ if (unlikely(!output_file->f_op || !output_file->f_op->write)) { ++ err = -EINVAL; ++ goto out_close_out; ++ } ++ ++ /* allocating a buffer */ ++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (unlikely(!buf)) { ++ err = -ENOMEM; ++ goto out_close_out; ++ } ++ ++ input_file->f_pos = 0; ++ output_file->f_pos = 0; ++ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ ++ size = len; ++ err = 0; ++ do { ++ if (len >= PAGE_SIZE) ++ size = PAGE_SIZE; ++ else if ((len < PAGE_SIZE) && (len > 0)) ++ size = len; ++ ++ len -= PAGE_SIZE; ++ ++ read_bytes = ++ input_file->f_op->read(input_file, ++ (char __user *)buf, size, ++ &input_file->f_pos); ++ if (read_bytes <= 0) { ++ err = read_bytes; ++ break; ++ } ++ ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ write_bytes = ++ output_file->f_op->write(output_file, ++ (char __user *)buf, ++ read_bytes, ++ &output_file->f_pos); ++ lockdep_on(); ++ if ((write_bytes < 0) || (write_bytes < read_bytes)) { ++ err = write_bytes; ++ break; ++ } ++ } while ((read_bytes > 0) && (len > 0)); ++ ++ set_fs(old_fs); ++ ++ kfree(buf); ++ ++ if (!err) ++ err = output_file->f_op->fsync(output_file, 0); ++ ++ if (err) ++ goto out_close_out; ++ ++ if (copyup_file) { ++ *copyup_file = output_file; ++ goto out_close_in; ++ } ++ ++out_close_out: ++ fput(output_file); ++ ++out_close_in2: ++ branchput(sb, new_bindex); ++ ++out_close_in: ++ fput(input_file); ++ ++out: ++ branchput(sb, old_bindex); ++ ++ return err; ++} ++ ++/* ++ * dput the lower references for old and new dentry & clear a lower dentry ++ * pointer ++ */ ++static void __clear(struct dentry *dentry, struct dentry *old_lower_dentry, ++ int old_bstart, int old_bend, ++ struct dentry *new_lower_dentry, int new_bindex) ++{ ++ /* get rid of the lower dentry and all its traces */ ++ unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL); ++ dbstart(dentry) = old_bstart; ++ dbend(dentry) = old_bend; ++ ++ dput(new_lower_dentry); ++ dput(old_lower_dentry); ++} ++ ++/* ++ * Copy up a dentry to a file of specified name. ++ * ++ * @dir: used to pull the ->i_sb to access other branches ++ * @dentry: the non-negative dentry whose lower_inode we should copy ++ * @bstart: the branch of the lower_inode to copy from ++ * @new_bindex: the branch to create the new file in ++ * @name: the name of the file to create ++ * @namelen: length of @name ++ * @copyup_file: the "struct file" to return (optional) ++ * @len: how many bytes to copy-up? ++ */ ++int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart, ++ int new_bindex, const char *name, int namelen, ++ struct file **copyup_file, loff_t len) ++{ ++ struct dentry *new_lower_dentry; ++ struct dentry *old_lower_dentry = NULL; ++ struct super_block *sb; ++ int err = 0; ++ int old_bindex; ++ int old_bstart; ++ int old_bend; ++ struct dentry *new_lower_parent_dentry = NULL; ++ mm_segment_t oldfs; ++ char *symbuf = NULL; ++ ++ verify_locked(dentry); ++ ++ old_bindex = bstart; ++ old_bstart = dbstart(dentry); ++ old_bend = dbend(dentry); ++ ++ BUG_ON(new_bindex < 0); ++ BUG_ON(new_bindex >= old_bindex); ++ ++ sb = dir->i_sb; ++ ++ err = is_robranch_super(sb, new_bindex); ++ if (err) ++ goto out; ++ ++ /* Create the directory structure above this dentry. */ ++ new_lower_dentry = create_parents(dir, dentry, name, new_bindex); ++ if (IS_ERR(new_lower_dentry)) { ++ err = PTR_ERR(new_lower_dentry); ++ goto out; ++ } ++ ++ old_lower_dentry = unionfs_lower_dentry_idx(dentry, old_bindex); ++ /* we conditionally dput this old_lower_dentry at end of function */ ++ dget(old_lower_dentry); ++ ++ /* For symlinks, we must read the link before we lock the directory. */ ++ if (S_ISLNK(old_lower_dentry->d_inode->i_mode)) { ++ ++ symbuf = kmalloc(PATH_MAX, GFP_KERNEL); ++ if (unlikely(!symbuf)) { ++ __clear(dentry, old_lower_dentry, ++ old_bstart, old_bend, ++ new_lower_dentry, new_bindex); ++ err = -ENOMEM; ++ goto out_free; ++ } ++ ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = old_lower_dentry->d_inode->i_op->readlink( ++ old_lower_dentry, ++ (char __user *)symbuf, ++ PATH_MAX); ++ set_fs(oldfs); ++ if (err < 0) { ++ __clear(dentry, old_lower_dentry, ++ old_bstart, old_bend, ++ new_lower_dentry, new_bindex); ++ goto out_free; ++ } ++ symbuf[err] = '\0'; ++ } ++ ++ /* Now we lock the parent, and create the object in the new branch. */ ++ new_lower_parent_dentry = lock_parent(new_lower_dentry); ++ ++ /* create the new inode */ ++ err = __copyup_ndentry(old_lower_dentry, new_lower_dentry, ++ new_lower_parent_dentry, symbuf); ++ ++ if (err) { ++ __clear(dentry, old_lower_dentry, ++ old_bstart, old_bend, ++ new_lower_dentry, new_bindex); ++ goto out_unlock; ++ } ++ ++ /* We actually copyup the file here. */ ++ if (S_ISREG(old_lower_dentry->d_inode->i_mode)) ++ err = __copyup_reg_data(dentry, new_lower_dentry, new_bindex, ++ old_lower_dentry, old_bindex, ++ copyup_file, len); ++ if (err) ++ goto out_unlink; ++ ++ /* Set permissions. */ ++ err = copyup_permissions(sb, old_lower_dentry, new_lower_dentry); ++ if (err) ++ goto out_unlink; ++ ++#ifdef CONFIG_UNION_FS_XATTR ++ /* Selinux uses extended attributes for permissions. */ ++ err = copyup_xattrs(old_lower_dentry, new_lower_dentry); ++ if (err) ++ goto out_unlink; ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++ /* do not allow files getting deleted to be re-interposed */ ++ if (!d_deleted(dentry)) ++ unionfs_reinterpose(dentry); ++ ++ goto out_unlock; ++ ++out_unlink: ++ /* ++ * copyup failed, because we possibly ran out of space or ++ * quota, or something else happened so let's unlink; we don't ++ * really care about the return value of vfs_unlink ++ */ ++ vfs_unlink(new_lower_parent_dentry->d_inode, new_lower_dentry); ++ ++ if (copyup_file) { ++ /* need to close the file */ ++ ++ fput(*copyup_file); ++ branchput(sb, new_bindex); ++ } ++ ++ /* ++ * TODO: should we reset the error to something like -EIO? ++ * ++ * If we don't reset, the user may get some nonsensical errors, but ++ * on the other hand, if we reset to EIO, we guarantee that the user ++ * will get a "confusing" error message. ++ */ ++ ++out_unlock: ++ unlock_dir(new_lower_parent_dentry); ++ ++out_free: ++ /* ++ * If old_lower_dentry was not a file, then we need to dput it. If ++ * it was a file, then it was already dput indirectly by other ++ * functions we call above which operate on regular files. ++ */ ++ if (old_lower_dentry && old_lower_dentry->d_inode && ++ !S_ISREG(old_lower_dentry->d_inode->i_mode)) ++ dput(old_lower_dentry); ++ kfree(symbuf); ++ ++ if (err) { ++ /* ++ * if directory creation succeeded, but inode copyup failed, ++ * then purge new dentries. ++ */ ++ if (dbstart(dentry) < old_bstart && ++ ibstart(dentry->d_inode) > dbstart(dentry)) ++ __clear(dentry, NULL, old_bstart, old_bend, ++ unionfs_lower_dentry(dentry), dbstart(dentry)); ++ goto out; ++ } ++ if (!S_ISDIR(dentry->d_inode->i_mode)) { ++ unionfs_postcopyup_release(dentry); ++ if (!unionfs_lower_inode(dentry->d_inode)) { ++ /* ++ * If we got here, then we copied up to an ++ * unlinked-open file, whose name is .unionfsXXXXX. ++ */ ++ struct inode *inode = new_lower_dentry->d_inode; ++ atomic_inc(&inode->i_count); ++ unionfs_set_lower_inode_idx(dentry->d_inode, ++ ibstart(dentry->d_inode), ++ inode); ++ } ++ } ++ unionfs_postcopyup_setmnt(dentry); ++ /* sync inode times from copied-up inode to our inode */ ++ unionfs_copy_attr_times(dentry->d_inode); ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(dentry); ++out: ++ return err; ++} ++ ++/* ++ * This function creates a copy of a file represented by 'file' which ++ * currently resides in branch 'bstart' to branch 'new_bindex.' The copy ++ * will be named "name". ++ */ ++int copyup_named_file(struct inode *dir, struct file *file, char *name, ++ int bstart, int new_bindex, loff_t len) ++{ ++ int err = 0; ++ struct file *output_file = NULL; ++ ++ err = copyup_dentry(dir, file->f_path.dentry, bstart, new_bindex, ++ name, strlen(name), &output_file, len); ++ if (!err) { ++ fbstart(file) = new_bindex; ++ unionfs_set_lower_file_idx(file, new_bindex, output_file); ++ } ++ ++ return err; ++} ++ ++/* ++ * This function creates a copy of a file represented by 'file' which ++ * currently resides in branch 'bstart' to branch 'new_bindex'. ++ */ ++int copyup_file(struct inode *dir, struct file *file, int bstart, ++ int new_bindex, loff_t len) ++{ ++ int err = 0; ++ struct file *output_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ ++ err = copyup_dentry(dir, dentry, bstart, new_bindex, ++ dentry->d_name.name, dentry->d_name.len, ++ &output_file, len); ++ if (!err) { ++ fbstart(file) = new_bindex; ++ unionfs_set_lower_file_idx(file, new_bindex, output_file); ++ } ++ ++ return err; ++} ++ ++/* purge a dentry's lower-branch states (dput/mntput, etc.) */ ++static void __cleanup_dentry(struct dentry *dentry, int bindex, ++ int old_bstart, int old_bend) ++{ ++ int loop_start; ++ int loop_end; ++ int new_bstart = -1; ++ int new_bend = -1; ++ int i; ++ ++ loop_start = min(old_bstart, bindex); ++ loop_end = max(old_bend, bindex); ++ ++ /* ++ * This loop sets the bstart and bend for the new dentry by ++ * traversing from left to right. It also dputs all negative ++ * dentries except bindex ++ */ ++ for (i = loop_start; i <= loop_end; i++) { ++ if (!unionfs_lower_dentry_idx(dentry, i)) ++ continue; ++ ++ if (i == bindex) { ++ new_bend = i; ++ if (new_bstart < 0) ++ new_bstart = i; ++ continue; ++ } ++ ++ if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) { ++ dput(unionfs_lower_dentry_idx(dentry, i)); ++ unionfs_set_lower_dentry_idx(dentry, i, NULL); ++ ++ unionfs_mntput(dentry, i); ++ unionfs_set_lower_mnt_idx(dentry, i, NULL); ++ } else { ++ if (new_bstart < 0) ++ new_bstart = i; ++ new_bend = i; ++ } ++ } ++ ++ if (new_bstart < 0) ++ new_bstart = bindex; ++ if (new_bend < 0) ++ new_bend = bindex; ++ dbstart(dentry) = new_bstart; ++ dbend(dentry) = new_bend; ++ ++} ++ ++/* set lower inode ptr and update bstart & bend if necessary */ ++static void __set_inode(struct dentry *upper, struct dentry *lower, ++ int bindex) ++{ ++ unionfs_set_lower_inode_idx(upper->d_inode, bindex, ++ igrab(lower->d_inode)); ++ if (likely(ibstart(upper->d_inode) > bindex)) ++ ibstart(upper->d_inode) = bindex; ++ if (likely(ibend(upper->d_inode) < bindex)) ++ ibend(upper->d_inode) = bindex; ++ ++} ++ ++/* set lower dentry ptr and update bstart & bend if necessary */ ++static void __set_dentry(struct dentry *upper, struct dentry *lower, ++ int bindex) ++{ ++ unionfs_set_lower_dentry_idx(upper, bindex, lower); ++ if (likely(dbstart(upper) > bindex)) ++ dbstart(upper) = bindex; ++ if (likely(dbend(upper) < bindex)) ++ dbend(upper) = bindex; ++} ++ ++/* ++ * This function replicates the directory structure up-to given dentry ++ * in the bindex branch. ++ */ ++struct dentry *create_parents(struct inode *dir, struct dentry *dentry, ++ const char *name, int bindex) ++{ ++ int err; ++ struct dentry *child_dentry; ++ struct dentry *parent_dentry; ++ struct dentry *lower_parent_dentry = NULL; ++ struct dentry *lower_dentry = NULL; ++ const char *childname; ++ unsigned int childnamelen; ++ int nr_dentry; ++ int count = 0; ++ int old_bstart; ++ int old_bend; ++ struct dentry **path = NULL; ++ struct super_block *sb; ++ ++ verify_locked(dentry); ++ ++ err = is_robranch_super(dir->i_sb, bindex); ++ if (err) { ++ lower_dentry = ERR_PTR(err); ++ goto out; ++ } ++ ++ old_bstart = dbstart(dentry); ++ old_bend = dbend(dentry); ++ ++ lower_dentry = ERR_PTR(-ENOMEM); ++ ++ /* There is no sense allocating any less than the minimum. */ ++ nr_dentry = 1; ++ path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL); ++ if (unlikely(!path)) ++ goto out; ++ ++ /* assume the negative dentry of unionfs as the parent dentry */ ++ parent_dentry = dentry; ++ ++ /* ++ * This loop finds the first parent that exists in the given branch. ++ * We start building the directory structure from there. At the end ++ * of the loop, the following should hold: ++ * - child_dentry is the first nonexistent child ++ * - parent_dentry is the first existent parent ++ * - path[0] is the = deepest child ++ * - path[count] is the first child to create ++ */ ++ do { ++ child_dentry = parent_dentry; ++ ++ /* find the parent directory dentry in unionfs */ ++ parent_dentry = dget_parent(child_dentry); ++ ++ /* find out the lower_parent_dentry in the given branch */ ++ lower_parent_dentry = ++ unionfs_lower_dentry_idx(parent_dentry, bindex); ++ ++ /* grow path table */ ++ if (count == nr_dentry) { ++ void *p; ++ ++ nr_dentry *= 2; ++ p = krealloc(path, nr_dentry * sizeof(struct dentry *), ++ GFP_KERNEL); ++ if (unlikely(!p)) { ++ lower_dentry = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ path = p; ++ } ++ ++ /* store the child dentry */ ++ path[count++] = child_dentry; ++ } while (!lower_parent_dentry); ++ count--; ++ ++ sb = dentry->d_sb; ++ ++ /* ++ * This code goes between the begin/end labels and basically ++ * emulates a while(child_dentry != dentry), only cleaner and ++ * shorter than what would be a much longer while loop. ++ */ ++begin: ++ /* get lower parent dir in the current branch */ ++ lower_parent_dentry = unionfs_lower_dentry_idx(parent_dentry, bindex); ++ dput(parent_dentry); ++ ++ /* init the values to lookup */ ++ childname = child_dentry->d_name.name; ++ childnamelen = child_dentry->d_name.len; ++ ++ if (child_dentry != dentry) { ++ /* lookup child in the underlying file system */ ++ lower_dentry = lookup_lck_len(childname, lower_parent_dentry, ++ childnamelen); ++ if (IS_ERR(lower_dentry)) ++ goto out; ++ } else { ++ /* ++ * Is the name a whiteout of the child name ? lookup the ++ * whiteout child in the underlying file system ++ */ ++ lower_dentry = lookup_lck_len(name, lower_parent_dentry, ++ strlen(name)); ++ if (IS_ERR(lower_dentry)) ++ goto out; ++ ++ /* Replace the current dentry (if any) with the new one */ ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_set_lower_dentry_idx(dentry, bindex, ++ lower_dentry); ++ ++ __cleanup_dentry(dentry, bindex, old_bstart, old_bend); ++ goto out; ++ } ++ ++ if (lower_dentry->d_inode) { ++ /* ++ * since this already exists we dput to avoid ++ * multiple references on the same dentry ++ */ ++ dput(lower_dentry); ++ } else { ++ struct sioq_args args; ++ ++ /* it's a negative dentry, create a new dir */ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ ++ args.mkdir.parent = lower_parent_dentry->d_inode; ++ args.mkdir.dentry = lower_dentry; ++ args.mkdir.mode = child_dentry->d_inode->i_mode; ++ ++ run_sioq(__unionfs_mkdir, &args); ++ err = args.err; ++ ++ if (!err) ++ err = copyup_permissions(dir->i_sb, child_dentry, ++ lower_dentry); ++ unlock_dir(lower_parent_dentry); ++ if (err) { ++ dput(lower_dentry); ++ lower_dentry = ERR_PTR(err); ++ goto out; ++ } ++ ++ } ++ ++ __set_inode(child_dentry, lower_dentry, bindex); ++ __set_dentry(child_dentry, lower_dentry, bindex); ++ /* ++ * update times of this dentry, but also the parent, because if ++ * we changed, the parent may have changed too. ++ */ ++ fsstack_copy_attr_times(parent_dentry->d_inode, ++ lower_parent_dentry->d_inode); ++ unionfs_copy_attr_times(child_dentry->d_inode); ++ ++ parent_dentry = child_dentry; ++ child_dentry = path[--count]; ++ goto begin; ++out: ++ /* cleanup any leftover locks from the do/while loop above */ ++ if (IS_ERR(lower_dentry)) ++ while (count) ++ dput(path[count--]); ++ kfree(path); ++ return lower_dentry; ++} ++ ++/* ++ * Post-copyup helper to ensure we have valid mnts: set lower mnt of ++ * dentry+parents to the first parent node that has an mnt. ++ */ ++void unionfs_postcopyup_setmnt(struct dentry *dentry) ++{ ++ struct dentry *parent, *hasone; ++ int bindex = dbstart(dentry); ++ ++ if (unionfs_lower_mnt_idx(dentry, bindex)) ++ return; ++ hasone = dentry->d_parent; ++ /* this loop should stop at root dentry */ ++ while (!unionfs_lower_mnt_idx(hasone, bindex)) ++ hasone = hasone->d_parent; ++ parent = dentry; ++ while (!unionfs_lower_mnt_idx(parent, bindex)) { ++ unionfs_set_lower_mnt_idx(parent, bindex, ++ unionfs_mntget(hasone, bindex)); ++ parent = parent->d_parent; ++ } ++} ++ ++/* ++ * Post-copyup helper to release all non-directory source objects of a ++ * copied-up file. Regular files should have only one lower object. ++ */ ++void unionfs_postcopyup_release(struct dentry *dentry) ++{ ++ int bstart, bend; ++ ++ BUG_ON(S_ISDIR(dentry->d_inode->i_mode)); ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ path_put_lowers(dentry, bstart + 1, bend, false); ++ iput_lowers(dentry->d_inode, bstart + 1, bend, false); ++ ++ dbend(dentry) = bstart; ++ ibend(dentry->d_inode) = ibstart(dentry->d_inode) = bstart; ++} +diff --git a/fs/unionfs/debug.c b/fs/unionfs/debug.c +new file mode 100644 +index 0000000..100d2c6 +--- /dev/null ++++ b/fs/unionfs/debug.c +@@ -0,0 +1,532 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Helper debugging functions for maintainers (and for users to report back ++ * useful information back to maintainers) ++ */ ++ ++/* it's always useful to know what part of the code called us */ ++#define PRINT_CALLER(fname, fxn, line) \ ++ do { \ ++ if (!printed_caller) { \ ++ pr_debug("PC:%s:%s:%d\n", (fname), (fxn), (line)); \ ++ printed_caller = 1; \ ++ } \ ++ } while (0) ++ ++/* ++ * __unionfs_check_{inode,dentry,file} perform exhaustive sanity checking on ++ * the fan-out of various Unionfs objects. We check that no lower objects ++ * exist outside the start/end branch range; that all objects within are ++ * non-NULL (with some allowed exceptions); that for every lower file ++ * there's a lower dentry+inode; that the start/end ranges match for all ++ * corresponding lower objects; that open files/symlinks have only one lower ++ * objects, but directories can have several; and more. ++ */ ++void __unionfs_check_inode(const struct inode *inode, ++ const char *fname, const char *fxn, int line) ++{ ++ int bindex; ++ int istart, iend; ++ struct inode *lower_inode; ++ struct super_block *sb; ++ int printed_caller = 0; ++ void *poison_ptr; ++ ++ /* for inodes now */ ++ BUG_ON(!inode); ++ sb = inode->i_sb; ++ istart = ibstart(inode); ++ iend = ibend(inode); ++ /* don't check inode if no lower branches */ ++ if (istart < 0 && iend < 0) ++ return; ++ if (unlikely(istart > iend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci0: inode=%p istart/end=%d:%d\n", ++ inode, istart, iend); ++ } ++ if (unlikely((istart == -1 && iend != -1) || ++ (istart != -1 && iend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci1: inode=%p istart/end=%d:%d\n", ++ inode, istart, iend); ++ } ++ if (!S_ISDIR(inode->i_mode)) { ++ if (unlikely(iend != istart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci2: inode=%p istart=%d iend=%d\n", ++ inode, istart, iend); ++ } ++ } ++ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ if (unlikely(!UNIONFS_I(inode))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci3: no inode_info %p\n", inode); ++ return; ++ } ++ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci4: no lower_inodes %p\n", inode); ++ return; ++ } ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (lower_inode) { ++ memset(&poison_ptr, POISON_INUSE, sizeof(void *)); ++ if (unlikely(bindex < istart || bindex > iend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci5: inode/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", inode, ++ lower_inode, bindex, istart, iend); ++ } else if (unlikely(lower_inode == poison_ptr)) { ++ /* freed inode! */ ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci6: inode/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", inode, ++ lower_inode, bindex, istart, iend); ++ } ++ continue; ++ } ++ /* if we get here, then lower_inode == NULL */ ++ if (bindex < istart || bindex > iend) ++ continue; ++ /* ++ * directories can have NULL lower inodes in b/t start/end, ++ * but NOT if at the start/end range. ++ */ ++ if (unlikely(S_ISDIR(inode->i_mode) && ++ bindex > istart && bindex < iend)) ++ continue; ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Ci7: inode/linode=%p:%p " ++ "bindex=%d istart/end=%d:%d\n", ++ inode, lower_inode, bindex, istart, iend); ++ } ++} ++ ++void __unionfs_check_dentry(const struct dentry *dentry, ++ const char *fname, const char *fxn, int line) ++{ ++ int bindex; ++ int dstart, dend, istart, iend; ++ struct dentry *lower_dentry; ++ struct inode *inode, *lower_inode; ++ struct super_block *sb; ++ struct vfsmount *lower_mnt; ++ int printed_caller = 0; ++ void *poison_ptr; ++ ++ BUG_ON(!dentry); ++ sb = dentry->d_sb; ++ inode = dentry->d_inode; ++ dstart = dbstart(dentry); ++ dend = dbend(dentry); ++ /* don't check dentry/mnt if no lower branches */ ++ if (dstart < 0 && dend < 0) ++ goto check_inode; ++ BUG_ON(dstart > dend); ++ ++ if (unlikely((dstart == -1 && dend != -1) || ++ (dstart != -1 && dend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CD0: dentry=%p dstart/end=%d:%d\n", ++ dentry, dstart, dend); ++ } ++ /* ++ * check for NULL dentries inside the start/end range, or ++ * non-NULL dentries outside the start/end range. ++ */ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (lower_dentry) { ++ if (unlikely(bindex < dstart || bindex > dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CD1: dentry/lower=%p:%p(%p) " ++ "bindex=%d dstart/end=%d:%d\n", ++ dentry, lower_dentry, ++ (lower_dentry ? lower_dentry->d_inode : ++ (void *) -1L), ++ bindex, dstart, dend); ++ } ++ } else { /* lower_dentry == NULL */ ++ if (bindex < dstart || bindex > dend) ++ continue; ++ /* ++ * Directories can have NULL lower inodes in b/t ++ * start/end, but NOT if at the start/end range. ++ * Ignore this rule, however, if this is a NULL ++ * dentry or a deleted dentry. ++ */ ++ if (unlikely(!d_deleted((struct dentry *) dentry) && ++ inode && ++ !(inode && S_ISDIR(inode->i_mode) && ++ bindex > dstart && bindex < dend))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CD2: dentry/lower=%p:%p(%p) " ++ "bindex=%d dstart/end=%d:%d\n", ++ dentry, lower_dentry, ++ (lower_dentry ? ++ lower_dentry->d_inode : ++ (void *) -1L), ++ bindex, dstart, dend); ++ } ++ } ++ } ++ ++ /* check for vfsmounts same as for dentries */ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (lower_mnt) { ++ if (unlikely(bindex < dstart || bindex > dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CM0: dentry/lmnt=%p:%p bindex=%d " ++ "dstart/end=%d:%d\n", dentry, ++ lower_mnt, bindex, dstart, dend); ++ } ++ } else { /* lower_mnt == NULL */ ++ if (bindex < dstart || bindex > dend) ++ continue; ++ /* ++ * Directories can have NULL lower inodes in b/t ++ * start/end, but NOT if at the start/end range. ++ * Ignore this rule, however, if this is a NULL ++ * dentry. ++ */ ++ if (unlikely(inode && ++ !(inode && S_ISDIR(inode->i_mode) && ++ bindex > dstart && bindex < dend))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CM1: dentry/lmnt=%p:%p " ++ "bindex=%d dstart/end=%d:%d\n", ++ dentry, lower_mnt, bindex, ++ dstart, dend); ++ } ++ } ++ } ++ ++check_inode: ++ /* for inodes now */ ++ if (!inode) ++ return; ++ istart = ibstart(inode); ++ iend = ibend(inode); ++ /* don't check inode if no lower branches */ ++ if (istart < 0 && iend < 0) ++ return; ++ BUG_ON(istart > iend); ++ if (unlikely((istart == -1 && iend != -1) || ++ (istart != -1 && iend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI0: dentry/inode=%p:%p istart/end=%d:%d\n", ++ dentry, inode, istart, iend); ++ } ++ if (unlikely(istart != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI1: dentry/inode=%p:%p istart=%d dstart=%d\n", ++ dentry, inode, istart, dstart); ++ } ++ if (unlikely(iend != dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI2: dentry/inode=%p:%p iend=%d dend=%d\n", ++ dentry, inode, iend, dend); ++ } ++ ++ if (!S_ISDIR(inode->i_mode)) { ++ if (unlikely(dend != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI3: dentry/inode=%p:%p dstart=%d dend=%d\n", ++ dentry, inode, dstart, dend); ++ } ++ if (unlikely(iend != istart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI4: dentry/inode=%p:%p istart=%d iend=%d\n", ++ dentry, inode, istart, iend); ++ } ++ } ++ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (lower_inode) { ++ memset(&poison_ptr, POISON_INUSE, sizeof(void *)); ++ if (unlikely(bindex < istart || bindex > iend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI5: dentry/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", dentry, ++ lower_inode, bindex, istart, iend); ++ } else if (unlikely(lower_inode == poison_ptr)) { ++ /* freed inode! */ ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI6: dentry/linode=%p:%p bindex=%d " ++ "istart/end=%d:%d\n", dentry, ++ lower_inode, bindex, istart, iend); ++ } ++ continue; ++ } ++ /* if we get here, then lower_inode == NULL */ ++ if (bindex < istart || bindex > iend) ++ continue; ++ /* ++ * directories can have NULL lower inodes in b/t start/end, ++ * but NOT if at the start/end range. ++ */ ++ if (unlikely(S_ISDIR(inode->i_mode) && ++ bindex > istart && bindex < iend)) ++ continue; ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CI7: dentry/linode=%p:%p " ++ "bindex=%d istart/end=%d:%d\n", ++ dentry, lower_inode, bindex, istart, iend); ++ } ++ ++ /* ++ * If it's a directory, then intermediate objects b/t start/end can ++ * be NULL. But, check that all three are NULL: lower dentry, mnt, ++ * and inode. ++ */ ++ if (dstart >= 0 && dend >= 0 && S_ISDIR(inode->i_mode)) ++ for (bindex = dstart+1; bindex < dend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ lower_dentry = unionfs_lower_dentry_idx(dentry, ++ bindex); ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (unlikely(!((lower_inode && lower_dentry && ++ lower_mnt) || ++ (!lower_inode && ++ !lower_dentry && !lower_mnt)))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" Cx: lmnt/ldentry/linode=%p:%p:%p " ++ "bindex=%d dstart/end=%d:%d\n", ++ lower_mnt, lower_dentry, lower_inode, ++ bindex, dstart, dend); ++ } ++ } ++ /* check if lower inode is newer than upper one (it shouldn't) */ ++ if (unlikely(is_newer_lower(dentry) && !is_negative_lower(dentry))) { ++ PRINT_CALLER(fname, fxn, line); ++ for (bindex = ibstart(inode); bindex <= ibend(inode); ++ bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (unlikely(!lower_inode)) ++ continue; ++ pr_debug(" CI8: bindex=%d mtime/lmtime=%lu.%lu/%lu.%lu " ++ "ctime/lctime=%lu.%lu/%lu.%lu\n", ++ bindex, ++ inode->i_mtime.tv_sec, ++ inode->i_mtime.tv_nsec, ++ lower_inode->i_mtime.tv_sec, ++ lower_inode->i_mtime.tv_nsec, ++ inode->i_ctime.tv_sec, ++ inode->i_ctime.tv_nsec, ++ lower_inode->i_ctime.tv_sec, ++ lower_inode->i_ctime.tv_nsec); ++ } ++ } ++} ++ ++void __unionfs_check_file(const struct file *file, ++ const char *fname, const char *fxn, int line) ++{ ++ int bindex; ++ int dstart, dend, fstart, fend; ++ struct dentry *dentry; ++ struct file *lower_file; ++ struct inode *inode; ++ struct super_block *sb; ++ int printed_caller = 0; ++ ++ BUG_ON(!file); ++ dentry = file->f_path.dentry; ++ sb = dentry->d_sb; ++ dstart = dbstart(dentry); ++ dend = dbend(dentry); ++ BUG_ON(dstart > dend); ++ fstart = fbstart(file); ++ fend = fbend(file); ++ BUG_ON(fstart > fend); ++ ++ if (unlikely((fstart == -1 && fend != -1) || ++ (fstart != -1 && fend == -1))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF0: file/dentry=%p:%p fstart/end=%d:%d\n", ++ file, dentry, fstart, fend); ++ } ++ if (unlikely(fstart != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF1: file/dentry=%p:%p fstart=%d dstart=%d\n", ++ file, dentry, fstart, dstart); ++ } ++ if (unlikely(fend != dend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF2: file/dentry=%p:%p fend=%d dend=%d\n", ++ file, dentry, fend, dend); ++ } ++ inode = dentry->d_inode; ++ if (!S_ISDIR(inode->i_mode)) { ++ if (unlikely(fend != fstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF3: file/inode=%p:%p fstart=%d fend=%d\n", ++ file, inode, fstart, fend); ++ } ++ if (unlikely(dend != dstart)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF4: file/dentry=%p:%p dstart=%d dend=%d\n", ++ file, dentry, dstart, dend); ++ } ++ } ++ ++ /* ++ * check for NULL dentries inside the start/end range, or ++ * non-NULL dentries outside the start/end range. ++ */ ++ for (bindex = sbstart(sb); bindex < sbmax(sb); bindex++) { ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ if (lower_file) { ++ if (unlikely(bindex < fstart || bindex > fend)) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF5: file/lower=%p:%p bindex=%d " ++ "fstart/end=%d:%d\n", file, ++ lower_file, bindex, fstart, fend); ++ } ++ } else { /* lower_file == NULL */ ++ if (bindex >= fstart && bindex <= fend) { ++ /* ++ * directories can have NULL lower inodes in ++ * b/t start/end, but NOT if at the ++ * start/end range. ++ */ ++ if (unlikely(!(S_ISDIR(inode->i_mode) && ++ bindex > fstart && ++ bindex < fend))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CF6: file/lower=%p:%p " ++ "bindex=%d fstart/end=%d:%d\n", ++ file, lower_file, bindex, ++ fstart, fend); ++ } ++ } ++ } ++ } ++ ++ __unionfs_check_dentry(dentry, fname, fxn, line); ++} ++ ++void __unionfs_check_nd(const struct nameidata *nd, ++ const char *fname, const char *fxn, int line) ++{ ++ struct file *file; ++ int printed_caller = 0; ++ ++ if (unlikely(!nd)) ++ return; ++ if (nd->flags & LOOKUP_OPEN) { ++ file = nd->intent.open.file; ++ if (unlikely(file->f_path.dentry && ++ strcmp(file->f_path.dentry->d_sb->s_type->name, ++ UNIONFS_NAME))) { ++ PRINT_CALLER(fname, fxn, line); ++ pr_debug(" CND1: lower_file of type %s\n", ++ file->f_path.dentry->d_sb->s_type->name); ++ } ++ } ++} ++ ++/* useful to track vfsmount leaks that could cause EBUSY on unmount */ ++void __show_branch_counts(const struct super_block *sb, ++ const char *file, const char *fxn, int line) ++{ ++ int i; ++ struct vfsmount *mnt; ++ ++ pr_debug("BC:"); ++ for (i = 0; i < sbmax(sb); i++) { ++ if (likely(sb->s_root)) ++ mnt = UNIONFS_D(sb->s_root)->lower_paths[i].mnt; ++ else ++ mnt = NULL; ++ printk(KERN_CONT "%d:", ++ (mnt ? atomic_read(&mnt->mnt_count) : -99)); ++ } ++ printk(KERN_CONT "%s:%s:%d\n", file, fxn, line); ++} ++ ++void __show_inode_times(const struct inode *inode, ++ const char *file, const char *fxn, int line) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (unlikely(!lower_inode)) ++ continue; ++ pr_debug("IT(%lu:%d): %s:%s:%d " ++ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n", ++ inode->i_ino, bindex, ++ file, fxn, line, ++ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, ++ lower_inode->i_mtime.tv_sec, ++ lower_inode->i_mtime.tv_nsec, ++ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ++ lower_inode->i_ctime.tv_sec, ++ lower_inode->i_ctime.tv_nsec); ++ } ++} ++ ++void __show_dinode_times(const struct dentry *dentry, ++ const char *file, const char *fxn, int line) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct inode *lower_inode; ++ int bindex; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ pr_debug("DT(%s:%lu:%d): %s:%s:%d " ++ "um=%lu/%lu lm=%lu/%lu uc=%lu/%lu lc=%lu/%lu\n", ++ dentry->d_name.name, inode->i_ino, bindex, ++ file, fxn, line, ++ inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, ++ lower_inode->i_mtime.tv_sec, ++ lower_inode->i_mtime.tv_nsec, ++ inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, ++ lower_inode->i_ctime.tv_sec, ++ lower_inode->i_ctime.tv_nsec); ++ } ++} ++ ++void __show_inode_counts(const struct inode *inode, ++ const char *file, const char *fxn, int line) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ ++ if (unlikely(!inode)) { ++ pr_debug("SiC: Null inode\n"); ++ return; ++ } ++ for (bindex = sbstart(inode->i_sb); bindex <= sbend(inode->i_sb); ++ bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (unlikely(!lower_inode)) ++ continue; ++ pr_debug("SIC(%lu:%d:%d): lc=%d %s:%s:%d\n", ++ inode->i_ino, bindex, ++ atomic_read(&(inode)->i_count), ++ atomic_read(&(lower_inode)->i_count), ++ file, fxn, line); ++ } ++} +diff --git a/fs/unionfs/dentry.c b/fs/unionfs/dentry.c +new file mode 100644 +index 0000000..a0c3bba +--- /dev/null ++++ b/fs/unionfs/dentry.c +@@ -0,0 +1,397 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++bool is_negative_lower(const struct dentry *dentry) ++{ ++ int bindex; ++ struct dentry *lower_dentry; ++ ++ BUG_ON(!dentry); ++ /* cache coherency: check if file was deleted on lower branch */ ++ if (dbstart(dentry) < 0) ++ return true; ++ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ /* unhashed (i.e., unlinked) lower dentries don't count */ ++ if (lower_dentry && lower_dentry->d_inode && ++ !d_deleted(lower_dentry) && ++ !(lower_dentry->d_flags & DCACHE_NFSFS_RENAMED)) ++ return false; ++ } ++ return true; ++} ++ ++static inline void __dput_lowers(struct dentry *dentry, int start, int end) ++{ ++ struct dentry *lower_dentry; ++ int bindex; ++ ++ if (start < 0) ++ return; ++ for (bindex = start; bindex <= end; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ dput(lower_dentry); ++ } ++} ++ ++/* ++ * Purge and invalidate as many data pages of a unionfs inode. This is ++ * called when the lower inode has changed, and we want to force processes ++ * to re-get the new data. ++ */ ++static inline void purge_inode_data(struct inode *inode) ++{ ++ /* remove all non-private mappings */ ++ unmap_mapping_range(inode->i_mapping, 0, 0, 0); ++ /* invalidate as many pages as possible */ ++ invalidate_mapping_pages(inode->i_mapping, 0, -1); ++ /* ++ * Don't try to truncate_inode_pages here, because this could lead ++ * to a deadlock between some of address_space ops and dentry ++ * revalidation: the address space op is invoked with a lock on our ++ * own page, and truncate_inode_pages will block on locked pages. ++ */ ++} ++ ++/* ++ * Revalidate a single file/symlink/special dentry. Assume that info nodes ++ * of the @dentry and its @parent are locked. Assume parent is valid, ++ * otherwise return false (and let's hope the VFS will try to re-lookup this ++ * dentry). Returns true if valid, false otherwise. ++ */ ++bool __unionfs_d_revalidate(struct dentry *dentry, struct dentry *parent, ++ bool willwrite) ++{ ++ bool valid = true; /* default is valid */ ++ struct dentry *lower_dentry; ++ struct dentry *result; ++ int bindex, bstart, bend; ++ int sbgen, dgen, pdgen; ++ int positive = 0; ++ int interpose_flag; ++ ++ verify_locked(dentry); ++ verify_locked(parent); ++ ++ /* if the dentry is unhashed, do NOT revalidate */ ++ if (d_deleted(dentry)) ++ goto out; ++ ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ ++ if (is_newer_lower(dentry)) { ++ /* root dentry is always valid */ ++ if (IS_ROOT(dentry)) { ++ unionfs_copy_attr_times(dentry->d_inode); ++ } else { ++ /* ++ * reset generation number to zero, guaranteed to be ++ * "old" ++ */ ++ dgen = 0; ++ atomic_set(&UNIONFS_D(dentry)->generation, dgen); ++ } ++ if (!willwrite) ++ purge_inode_data(dentry->d_inode); ++ } ++ ++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); ++ ++ BUG_ON(dbstart(dentry) == -1); ++ if (dentry->d_inode) ++ positive = 1; ++ ++ /* if our dentry is valid, then validate all lower ones */ ++ if (sbgen == dgen) ++ goto validate_lowers; ++ ++ /* The root entry should always be valid */ ++ BUG_ON(IS_ROOT(dentry)); ++ ++ /* We can't work correctly if our parent isn't valid. */ ++ pdgen = atomic_read(&UNIONFS_D(parent)->generation); ++ ++ /* Free the pointers for our inodes and this dentry. */ ++ path_put_lowers_all(dentry, false); ++ ++ interpose_flag = INTERPOSE_REVAL_NEG; ++ if (positive) { ++ interpose_flag = INTERPOSE_REVAL; ++ iput_lowers_all(dentry->d_inode, true); ++ } ++ ++ if (realloc_dentry_private_data(dentry) != 0) { ++ valid = false; ++ goto out; ++ } ++ ++ result = unionfs_lookup_full(dentry, parent, interpose_flag); ++ if (result) { ++ if (IS_ERR(result)) { ++ valid = false; ++ goto out; ++ } ++ /* ++ * current unionfs_lookup_backend() doesn't return ++ * a valid dentry ++ */ ++ dput(dentry); ++ dentry = result; ++ } ++ ++ if (unlikely(positive && is_negative_lower(dentry))) { ++ /* call make_bad_inode here ? */ ++ d_drop(dentry); ++ valid = false; ++ goto out; ++ } ++ ++ /* ++ * if we got here then we have revalidated our dentry and all lower ++ * ones, so we can return safely. ++ */ ++ if (!valid) /* lower dentry revalidation failed */ ++ goto out; ++ ++ /* ++ * If the parent's gen no. matches the superblock's gen no., then ++ * we can update our denty's gen no. If they didn't match, then it ++ * was OK to revalidate this dentry with a stale parent, but we'll ++ * purposely not update our dentry's gen no. (so it can be redone); ++ * and, we'll mark our parent dentry as invalid so it'll force it ++ * (and our dentry) to be revalidated. ++ */ ++ if (pdgen == sbgen) ++ atomic_set(&UNIONFS_D(dentry)->generation, sbgen); ++ goto out; ++ ++validate_lowers: ++ ++ /* The revalidation must occur across all branches */ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ BUG_ON(bstart == -1); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry || !lower_dentry->d_op ++ || !lower_dentry->d_op->d_revalidate) ++ continue; ++ /* ++ * Don't pass nameidata to lower file system, because we ++ * don't want an arbitrary lower file being opened or ++ * returned to us: it may be useless to us because of the ++ * fanout nature of unionfs (cf. file/directory open-file ++ * invariants). We will open lower files as and when needed ++ * later on. ++ */ ++ if (!lower_dentry->d_op->d_revalidate(lower_dentry, NULL)) ++ valid = false; ++ } ++ ++ if (!dentry->d_inode || ++ ibstart(dentry->d_inode) < 0 || ++ ibend(dentry->d_inode) < 0) { ++ valid = false; ++ goto out; ++ } ++ ++ if (valid) { ++ /* ++ * If we get here, and we copy the meta-data from the lower ++ * inode to our inode, then it is vital that we have already ++ * purged all unionfs-level file data. We do that in the ++ * caller (__unionfs_d_revalidate) by calling ++ * purge_inode_data. ++ */ ++ unionfs_copy_attr_all(dentry->d_inode, ++ unionfs_lower_inode(dentry->d_inode)); ++ fsstack_copy_inode_size(dentry->d_inode, ++ unionfs_lower_inode(dentry->d_inode)); ++ } ++ ++out: ++ return valid; ++} ++ ++/* ++ * Determine if the lower inode objects have changed from below the unionfs ++ * inode. Return true if changed, false otherwise. ++ * ++ * We check if the mtime or ctime have changed. However, the inode times ++ * can be changed by anyone without much protection, including ++ * asynchronously. This can sometimes cause unionfs to find that the lower ++ * file system doesn't change its inode times quick enough, resulting in a ++ * false positive indication (which is harmless, it just makes unionfs do ++ * extra work in re-validating the objects). To minimize the chances of ++ * these situations, we still consider such small time changes valid, but we ++ * don't print debugging messages unless the time changes are greater than ++ * UNIONFS_MIN_CC_TIME (which defaults to 3 seconds, as with NFS's acregmin) ++ * because significant changes are more likely due to users manually ++ * touching lower files. ++ */ ++bool is_newer_lower(const struct dentry *dentry) ++{ ++ int bindex; ++ struct inode *inode; ++ struct inode *lower_inode; ++ ++ /* ignore if we're called on semi-initialized dentries/inodes */ ++ if (!dentry || !UNIONFS_D(dentry)) ++ return false; ++ inode = dentry->d_inode; ++ if (!inode || !UNIONFS_I(inode)->lower_inodes || ++ ibstart(inode) < 0 || ibend(inode) < 0) ++ return false; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ ++ /* check if mtime/ctime have changed */ ++ if (unlikely(timespec_compare(&inode->i_mtime, ++ &lower_inode->i_mtime) < 0)) { ++ if ((lower_inode->i_mtime.tv_sec - ++ inode->i_mtime.tv_sec) > UNIONFS_MIN_CC_TIME) { ++ pr_info("unionfs: new lower inode mtime " ++ "(bindex=%d, name=%s)\n", bindex, ++ dentry->d_name.name); ++ show_dinode_times(dentry); ++ } ++ return true; ++ } ++ if (unlikely(timespec_compare(&inode->i_ctime, ++ &lower_inode->i_ctime) < 0)) { ++ if ((lower_inode->i_ctime.tv_sec - ++ inode->i_ctime.tv_sec) > UNIONFS_MIN_CC_TIME) { ++ pr_info("unionfs: new lower inode ctime " ++ "(bindex=%d, name=%s)\n", bindex, ++ dentry->d_name.name); ++ show_dinode_times(dentry); ++ } ++ return true; ++ } ++ } ++ ++ /* ++ * Last check: if this is a positive dentry, but somehow all lower ++ * dentries are negative or unhashed, then this dentry needs to be ++ * revalidated, because someone probably deleted the objects from ++ * the lower branches directly. ++ */ ++ if (is_negative_lower(dentry)) ++ return true; ++ ++ return false; /* default: lower is not newer */ ++} ++ ++static int unionfs_d_revalidate(struct dentry *dentry, ++ struct nameidata *nd_unused) ++{ ++ bool valid = true; ++ int err = 1; /* 1 means valid for the VFS */ ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (valid) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_dentry(dentry); ++ } else { ++ d_drop(dentry); ++ err = valid; ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++static void unionfs_d_release(struct dentry *dentry) ++{ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ if (unlikely(!UNIONFS_D(dentry))) ++ goto out; /* skip if no lower branches */ ++ /* must lock our branch configuration here */ ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ unionfs_check_dentry(dentry); ++ /* this could be a negative dentry, so check first */ ++ if (dbstart(dentry) < 0) { ++ unionfs_unlock_dentry(dentry); ++ goto out; /* due to a (normal) failed lookup */ ++ } ++ ++ /* Release all the lower dentries */ ++ path_put_lowers_all(dentry, true); ++ ++ unionfs_unlock_dentry(dentry); ++ ++out: ++ free_dentry_private_data(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return; ++} ++ ++/* ++ * Called when we're removing the last reference to our dentry. So we ++ * should drop all lower references too. ++ */ ++static void unionfs_d_iput(struct dentry *dentry, struct inode *inode) ++{ ++ int rc; ++ ++ BUG_ON(!dentry); ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (!UNIONFS_D(dentry) || dbstart(dentry) < 0) ++ goto drop_lower_inodes; ++ path_put_lowers_all(dentry, false); ++ ++drop_lower_inodes: ++ rc = atomic_read(&inode->i_count); ++ if (rc == 1 && inode->i_nlink == 1 && ibstart(inode) >= 0) { ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ iput(unionfs_lower_inode(inode)); ++ lockdep_on(); ++ unionfs_set_lower_inode(inode, NULL); ++ /* XXX: may need to set start/end to -1? */ ++ } ++ ++ iput(inode); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++} ++ ++struct dentry_operations unionfs_dops = { ++ .d_revalidate = unionfs_d_revalidate, ++ .d_release = unionfs_d_release, ++ .d_iput = unionfs_d_iput, ++}; +diff --git a/fs/unionfs/dirfops.c b/fs/unionfs/dirfops.c +new file mode 100644 +index 0000000..7da0ff0 +--- /dev/null ++++ b/fs/unionfs/dirfops.c +@@ -0,0 +1,302 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* Make sure our rdstate is playing by the rules. */ ++static void verify_rdstate_offset(struct unionfs_dir_state *rdstate) ++{ ++ BUG_ON(rdstate->offset >= DIREOF); ++ BUG_ON(rdstate->cookie >= MAXRDCOOKIE); ++} ++ ++struct unionfs_getdents_callback { ++ struct unionfs_dir_state *rdstate; ++ void *dirent; ++ int entries_written; ++ int filldir_called; ++ int filldir_error; ++ filldir_t filldir; ++ struct super_block *sb; ++}; ++ ++/* based on generic filldir in fs/readir.c */ ++static int unionfs_filldir(void *dirent, const char *oname, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ struct unionfs_getdents_callback *buf = dirent; ++ struct filldir_node *found = NULL; ++ int err = 0; ++ int is_whiteout; ++ char *name = (char *) oname; ++ ++ buf->filldir_called++; ++ ++ is_whiteout = is_whiteout_name(&name, &namelen); ++ ++ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout); ++ ++ if (found) { ++ /* ++ * If we had non-whiteout entry in dir cache, then mark it ++ * as a whiteout and but leave it in the dir cache. ++ */ ++ if (is_whiteout && !found->whiteout) ++ found->whiteout = is_whiteout; ++ goto out; ++ } ++ ++ /* if 'name' isn't a whiteout, filldir it. */ ++ if (!is_whiteout) { ++ off_t pos = rdstate2offset(buf->rdstate); ++ u64 unionfs_ino = ino; ++ ++ err = buf->filldir(buf->dirent, name, namelen, pos, ++ unionfs_ino, d_type); ++ buf->rdstate->offset++; ++ verify_rdstate_offset(buf->rdstate); ++ } ++ /* ++ * If we did fill it, stuff it in our hash, otherwise return an ++ * error. ++ */ ++ if (err) { ++ buf->filldir_error = err; ++ goto out; ++ } ++ buf->entries_written++; ++ err = add_filldir_node(buf->rdstate, name, namelen, ++ buf->rdstate->bindex, is_whiteout); ++ if (err) ++ buf->filldir_error = err; ++ ++out: ++ return err; ++} ++ ++static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err = 0; ++ struct file *lower_file = NULL; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ struct inode *inode = NULL; ++ struct unionfs_getdents_callback buf; ++ struct unionfs_dir_state *uds; ++ int bend; ++ loff_t offset; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, false); ++ if (unlikely(err)) ++ goto out; ++ ++ inode = dentry->d_inode; ++ ++ uds = UNIONFS_F(file)->rdstate; ++ if (!uds) { ++ if (file->f_pos == DIREOF) { ++ goto out; ++ } else if (file->f_pos > 0) { ++ uds = find_rdstate(inode, file->f_pos); ++ if (unlikely(!uds)) { ++ err = -ESTALE; ++ goto out; ++ } ++ UNIONFS_F(file)->rdstate = uds; ++ } else { ++ init_rdstate(file); ++ uds = UNIONFS_F(file)->rdstate; ++ } ++ } ++ bend = fbend(file); ++ ++ while (uds->bindex <= bend) { ++ lower_file = unionfs_lower_file_idx(file, uds->bindex); ++ if (!lower_file) { ++ uds->bindex++; ++ uds->dirpos = 0; ++ continue; ++ } ++ ++ /* prepare callback buffer */ ++ buf.filldir_called = 0; ++ buf.filldir_error = 0; ++ buf.entries_written = 0; ++ buf.dirent = dirent; ++ buf.filldir = filldir; ++ buf.rdstate = uds; ++ buf.sb = inode->i_sb; ++ ++ /* Read starting from where we last left off. */ ++ offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET); ++ if (offset < 0) { ++ err = offset; ++ goto out; ++ } ++ err = vfs_readdir(lower_file, unionfs_filldir, &buf); ++ ++ /* Save the position for when we continue. */ ++ offset = vfs_llseek(lower_file, 0, SEEK_CUR); ++ if (offset < 0) { ++ err = offset; ++ goto out; ++ } ++ uds->dirpos = offset; ++ ++ /* Copy the atime. */ ++ fsstack_copy_attr_atime(inode, ++ lower_file->f_path.dentry->d_inode); ++ ++ if (err < 0) ++ goto out; ++ ++ if (buf.filldir_error) ++ break; ++ ++ if (!buf.entries_written) { ++ uds->bindex++; ++ uds->dirpos = 0; ++ } ++ } ++ ++ if (!buf.filldir_error && uds->bindex >= bend) { ++ /* Save the number of hash entries for next time. */ ++ UNIONFS_I(inode)->hashsize = uds->hashentries; ++ free_rdstate(uds); ++ UNIONFS_F(file)->rdstate = NULL; ++ file->f_pos = DIREOF; ++ } else { ++ file->f_pos = rdstate2offset(uds); ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * This is not meant to be a generic repositioning function. If you do ++ * things that aren't supported, then we return EINVAL. ++ * ++ * What is allowed: ++ * (1) seeking to the same position that you are currently at ++ * This really has no effect, but returns where you are. ++ * (2) seeking to the beginning of the file ++ * This throws out all state, and lets you begin again. ++ */ ++static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin) ++{ ++ struct unionfs_dir_state *rdstate; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ loff_t err; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, false); ++ if (unlikely(err)) ++ goto out; ++ ++ rdstate = UNIONFS_F(file)->rdstate; ++ ++ /* ++ * we let users seek to their current position, but not anywhere ++ * else. ++ */ ++ if (!offset) { ++ switch (origin) { ++ case SEEK_SET: ++ if (rdstate) { ++ free_rdstate(rdstate); ++ UNIONFS_F(file)->rdstate = NULL; ++ } ++ init_rdstate(file); ++ err = 0; ++ break; ++ case SEEK_CUR: ++ err = file->f_pos; ++ break; ++ case SEEK_END: ++ /* Unsupported, because we would break everything. */ ++ err = -EINVAL; ++ break; ++ } ++ } else { ++ switch (origin) { ++ case SEEK_SET: ++ if (rdstate) { ++ if (offset == rdstate2offset(rdstate)) ++ err = offset; ++ else if (file->f_pos == DIREOF) ++ err = DIREOF; ++ else ++ err = -EINVAL; ++ } else { ++ struct inode *inode; ++ inode = dentry->d_inode; ++ rdstate = find_rdstate(inode, offset); ++ if (rdstate) { ++ UNIONFS_F(file)->rdstate = rdstate; ++ err = rdstate->offset; ++ } else { ++ err = -EINVAL; ++ } ++ } ++ break; ++ case SEEK_CUR: ++ case SEEK_END: ++ /* Unsupported, because we would break everything. */ ++ err = -EINVAL; ++ break; ++ } ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * Trimmed directory options, we shouldn't pass everything down since ++ * we don't want to operate on partial directories. ++ */ ++struct file_operations unionfs_dir_fops = { ++ .llseek = unionfs_dir_llseek, ++ .read = generic_read_dir, ++ .readdir = unionfs_readdir, ++ .unlocked_ioctl = unionfs_ioctl, ++ .open = unionfs_open, ++ .release = unionfs_file_release, ++ .flush = unionfs_flush, ++ .fsync = unionfs_fsync, ++ .fasync = unionfs_fasync, ++}; +diff --git a/fs/unionfs/dirhelper.c b/fs/unionfs/dirhelper.c +new file mode 100644 +index 0000000..033343b +--- /dev/null ++++ b/fs/unionfs/dirhelper.c +@@ -0,0 +1,158 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++#define RD_NONE 0 ++#define RD_CHECK_EMPTY 1 ++/* The callback structure for check_empty. */ ++struct unionfs_rdutil_callback { ++ int err; ++ int filldir_called; ++ struct unionfs_dir_state *rdstate; ++ int mode; ++}; ++ ++/* This filldir function makes sure only whiteouts exist within a directory. */ ++static int readdir_util_callback(void *dirent, const char *oname, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ int err = 0; ++ struct unionfs_rdutil_callback *buf = dirent; ++ int is_whiteout; ++ struct filldir_node *found; ++ char *name = (char *) oname; ++ ++ buf->filldir_called = 1; ++ ++ if (name[0] == '.' && (namelen == 1 || ++ (name[1] == '.' && namelen == 2))) ++ goto out; ++ ++ is_whiteout = is_whiteout_name(&name, &namelen); ++ ++ found = find_filldir_node(buf->rdstate, name, namelen, is_whiteout); ++ /* If it was found in the table there was a previous whiteout. */ ++ if (found) ++ goto out; ++ ++ /* ++ * if it wasn't found and isn't a whiteout, the directory isn't ++ * empty. ++ */ ++ err = -ENOTEMPTY; ++ if ((buf->mode == RD_CHECK_EMPTY) && !is_whiteout) ++ goto out; ++ ++ err = add_filldir_node(buf->rdstate, name, namelen, ++ buf->rdstate->bindex, is_whiteout); ++ ++out: ++ buf->err = err; ++ return err; ++} ++ ++/* Is a directory logically empty? */ ++int check_empty(struct dentry *dentry, struct dentry *parent, ++ struct unionfs_dir_state **namelist) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct vfsmount *mnt; ++ struct super_block *sb; ++ struct file *lower_file; ++ struct unionfs_rdutil_callback *buf = NULL; ++ int bindex, bstart, bend, bopaque; ++ ++ sb = dentry->d_sb; ++ ++ ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); ++ ++ err = unionfs_partial_lookup(dentry, parent); ++ if (err) ++ goto out; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ bopaque = dbopaque(dentry); ++ if (0 <= bopaque && bopaque < bend) ++ bend = bopaque; ++ ++ buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL); ++ if (unlikely(!buf)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ buf->err = 0; ++ buf->mode = RD_CHECK_EMPTY; ++ buf->rdstate = alloc_rdstate(dentry->d_inode, bstart); ++ if (unlikely(!buf->rdstate)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* Process the lower directories with rdutil_callback as a filldir. */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ if (!lower_dentry->d_inode) ++ continue; ++ if (!S_ISDIR(lower_dentry->d_inode->i_mode)) ++ continue; ++ ++ dget(lower_dentry); ++ mnt = unionfs_mntget(dentry, bindex); ++ branchget(sb, bindex); ++ lower_file = dentry_open(lower_dentry, mnt, O_RDONLY, current_cred()); ++ if (IS_ERR(lower_file)) { ++ err = PTR_ERR(lower_file); ++ branchput(sb, bindex); ++ goto out; ++ } ++ ++ do { ++ buf->filldir_called = 0; ++ buf->rdstate->bindex = bindex; ++ err = vfs_readdir(lower_file, ++ readdir_util_callback, buf); ++ if (buf->err) ++ err = buf->err; ++ } while ((err >= 0) && buf->filldir_called); ++ ++ /* fput calls dput for lower_dentry */ ++ fput(lower_file); ++ branchput(sb, bindex); ++ ++ if (err < 0) ++ goto out; ++ } ++ ++out: ++ if (buf) { ++ if (namelist && !err) ++ *namelist = buf->rdstate; ++ else if (buf->rdstate) ++ free_rdstate(buf->rdstate); ++ kfree(buf); ++ } ++ ++ ++ return err; ++} +diff --git a/fs/unionfs/fanout.h b/fs/unionfs/fanout.h +new file mode 100644 +index 0000000..5b77eac +--- /dev/null ++++ b/fs/unionfs/fanout.h +@@ -0,0 +1,407 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _FANOUT_H_ ++#define _FANOUT_H_ ++ ++/* ++ * Inode to private data ++ * ++ * Since we use containers and the struct inode is _inside_ the ++ * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL ++ * inode pointer), return a valid non-NULL pointer. ++ */ ++static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode) ++{ ++ return container_of(inode, struct unionfs_inode_info, vfs_inode); ++} ++ ++#define ibstart(ino) (UNIONFS_I(ino)->bstart) ++#define ibend(ino) (UNIONFS_I(ino)->bend) ++ ++/* Dentry to private data */ ++#define UNIONFS_D(dent) ((struct unionfs_dentry_info *)(dent)->d_fsdata) ++#define dbstart(dent) (UNIONFS_D(dent)->bstart) ++#define dbend(dent) (UNIONFS_D(dent)->bend) ++#define dbopaque(dent) (UNIONFS_D(dent)->bopaque) ++ ++/* Superblock to private data */ ++#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info) ++#define sbstart(sb) 0 ++#define sbend(sb) (UNIONFS_SB(sb)->bend) ++#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1) ++#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id) ++ ++/* File to private Data */ ++#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data)) ++#define fbstart(file) (UNIONFS_F(file)->bstart) ++#define fbend(file) (UNIONFS_F(file)->bend) ++ ++/* macros to manipulate branch IDs in stored in our superblock */ ++static inline int branch_id(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return UNIONFS_SB(sb)->data[index].branch_id; ++} ++ ++static inline void set_branch_id(struct super_block *sb, int index, int val) ++{ ++ BUG_ON(!sb || index < 0); ++ UNIONFS_SB(sb)->data[index].branch_id = val; ++} ++ ++static inline void new_branch_id(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id); ++} ++ ++/* ++ * Find new index of matching branch with an existing superblock of a known ++ * (possibly old) id. This is needed because branches could have been ++ * added/deleted causing the branches of any open files to shift. ++ * ++ * @sb: the new superblock which may have new/different branch IDs ++ * @id: the old/existing id we're looking for ++ * Returns index of newly found branch (0 or greater), -1 otherwise. ++ */ ++static inline int branch_id_to_idx(struct super_block *sb, int id) ++{ ++ int i; ++ for (i = 0; i < sbmax(sb); i++) { ++ if (branch_id(sb, i) == id) ++ return i; ++ } ++ /* in the non-ODF code, this should really never happen */ ++ printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id); ++ return -1; ++} ++ ++/* File to lower file. */ ++static inline struct file *unionfs_lower_file(const struct file *f) ++{ ++ BUG_ON(!f); ++ return UNIONFS_F(f)->lower_files[fbstart(f)]; ++} ++ ++static inline struct file *unionfs_lower_file_idx(const struct file *f, ++ int index) ++{ ++ BUG_ON(!f || index < 0); ++ return UNIONFS_F(f)->lower_files[index]; ++} ++ ++static inline void unionfs_set_lower_file_idx(struct file *f, int index, ++ struct file *val) ++{ ++ BUG_ON(!f || index < 0); ++ UNIONFS_F(f)->lower_files[index] = val; ++ /* save branch ID (may be redundant?) */ ++ UNIONFS_F(f)->saved_branch_ids[index] = ++ branch_id((f)->f_path.dentry->d_sb, index); ++} ++ ++static inline void unionfs_set_lower_file(struct file *f, struct file *val) ++{ ++ BUG_ON(!f); ++ unionfs_set_lower_file_idx((f), fbstart(f), (val)); ++} ++ ++/* Inode to lower inode. */ ++static inline struct inode *unionfs_lower_inode(const struct inode *i) ++{ ++ BUG_ON(!i); ++ return UNIONFS_I(i)->lower_inodes[ibstart(i)]; ++} ++ ++static inline struct inode *unionfs_lower_inode_idx(const struct inode *i, ++ int index) ++{ ++ BUG_ON(!i || index < 0); ++ return UNIONFS_I(i)->lower_inodes[index]; ++} ++ ++static inline void unionfs_set_lower_inode_idx(struct inode *i, int index, ++ struct inode *val) ++{ ++ BUG_ON(!i || index < 0); ++ UNIONFS_I(i)->lower_inodes[index] = val; ++} ++ ++static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val) ++{ ++ BUG_ON(!i); ++ UNIONFS_I(i)->lower_inodes[ibstart(i)] = val; ++} ++ ++/* Superblock to lower superblock. */ ++static inline struct super_block *unionfs_lower_super( ++ const struct super_block *sb) ++{ ++ BUG_ON(!sb); ++ return UNIONFS_SB(sb)->data[sbstart(sb)].sb; ++} ++ ++static inline struct super_block *unionfs_lower_super_idx( ++ const struct super_block *sb, ++ int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return UNIONFS_SB(sb)->data[index].sb; ++} ++ ++static inline void unionfs_set_lower_super_idx(struct super_block *sb, ++ int index, ++ struct super_block *val) ++{ ++ BUG_ON(!sb || index < 0); ++ UNIONFS_SB(sb)->data[index].sb = val; ++} ++ ++static inline void unionfs_set_lower_super(struct super_block *sb, ++ struct super_block *val) ++{ ++ BUG_ON(!sb); ++ UNIONFS_SB(sb)->data[sbstart(sb)].sb = val; ++} ++ ++/* Branch count macros. */ ++static inline int branch_count(const struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return atomic_read(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++static inline void set_branch_count(struct super_block *sb, int index, int val) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val); ++} ++ ++static inline void branchget(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_inc(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++static inline void branchput(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_dec(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++/* Dentry macros */ ++static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index, ++ struct dentry *val) ++{ ++ BUG_ON(!dent || index < 0); ++ UNIONFS_D(dent)->lower_paths[index].dentry = val; ++} ++ ++static inline struct dentry *unionfs_lower_dentry_idx( ++ const struct dentry *dent, ++ int index) ++{ ++ BUG_ON(!dent || index < 0); ++ return UNIONFS_D(dent)->lower_paths[index].dentry; ++} ++ ++static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return unionfs_lower_dentry_idx(dent, dbstart(dent)); ++} ++ ++static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index, ++ struct vfsmount *mnt) ++{ ++ BUG_ON(!dent || index < 0); ++ UNIONFS_D(dent)->lower_paths[index].mnt = mnt; ++} ++ ++static inline struct vfsmount *unionfs_lower_mnt_idx( ++ const struct dentry *dent, ++ int index) ++{ ++ BUG_ON(!dent || index < 0); ++ return UNIONFS_D(dent)->lower_paths[index].mnt; ++} ++ ++static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return unionfs_lower_mnt_idx(dent, dbstart(dent)); ++} ++ ++/* Macros for locking a dentry. */ ++enum unionfs_dentry_lock_class { ++ UNIONFS_DMUTEX_NORMAL, ++ UNIONFS_DMUTEX_ROOT, ++ UNIONFS_DMUTEX_PARENT, ++ UNIONFS_DMUTEX_CHILD, ++ UNIONFS_DMUTEX_WHITEOUT, ++ UNIONFS_DMUTEX_REVAL_PARENT, /* for file/dentry revalidate */ ++ UNIONFS_DMUTEX_REVAL_CHILD, /* for file/dentry revalidate */ ++}; ++ ++static inline void unionfs_lock_dentry(struct dentry *d, ++ unsigned int subclass) ++{ ++ BUG_ON(!d); ++ mutex_lock_nested(&UNIONFS_D(d)->lock, subclass); ++} ++ ++static inline void unionfs_unlock_dentry(struct dentry *d) ++{ ++ BUG_ON(!d); ++ mutex_unlock(&UNIONFS_D(d)->lock); ++} ++ ++static inline struct dentry *unionfs_lock_parent(struct dentry *d, ++ unsigned int subclass) ++{ ++ struct dentry *p; ++ ++ BUG_ON(!d); ++ p = dget_parent(d); ++ if (p != d) ++ mutex_lock_nested(&UNIONFS_D(p)->lock, subclass); ++ return p; ++} ++ ++static inline void unionfs_unlock_parent(struct dentry *d, struct dentry *p) ++{ ++ BUG_ON(!d); ++ BUG_ON(!p); ++ if (p != d) { ++ BUG_ON(!mutex_is_locked(&UNIONFS_D(p)->lock)); ++ mutex_unlock(&UNIONFS_D(p)->lock); ++ } ++ dput(p); ++} ++ ++static inline void verify_locked(struct dentry *d) ++{ ++ BUG_ON(!d); ++ BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock)); ++} ++ ++/* macros to put lower objects */ ++ ++/* ++ * iput lower inodes of an unionfs dentry, from bstart to bend. If ++ * @free_lower is true, then also kfree the memory used to hold the lower ++ * object pointers. ++ */ ++static inline void iput_lowers(struct inode *inode, ++ int bstart, int bend, bool free_lower) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ ++ BUG_ON(!inode); ++ BUG_ON(!UNIONFS_I(inode)); ++ BUG_ON(bstart < 0); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (lower_inode) { ++ unionfs_set_lower_inode_idx(inode, bindex, NULL); ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ iput(lower_inode); ++ lockdep_on(); ++ } ++ } ++ ++ if (free_lower) { ++ kfree(UNIONFS_I(inode)->lower_inodes); ++ UNIONFS_I(inode)->lower_inodes = NULL; ++ } ++} ++ ++/* iput all lower inodes, and reset start/end branch indices to -1 */ ++static inline void iput_lowers_all(struct inode *inode, bool free_lower) ++{ ++ int bstart, bend; ++ ++ BUG_ON(!inode); ++ BUG_ON(!UNIONFS_I(inode)); ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ BUG_ON(bstart < 0); ++ ++ iput_lowers(inode, bstart, bend, free_lower); ++ ibstart(inode) = ibend(inode) = -1; ++} ++ ++/* ++ * dput/mntput all lower dentries and vfsmounts of an unionfs dentry, from ++ * bstart to bend. If @free_lower is true, then also kfree the memory used ++ * to hold the lower object pointers. ++ * ++ * XXX: implement using path_put VFS macros ++ */ ++static inline void path_put_lowers(struct dentry *dentry, ++ int bstart, int bend, bool free_lower) ++{ ++ struct dentry *lower_dentry; ++ struct vfsmount *lower_mnt; ++ int bindex; ++ ++ BUG_ON(!dentry); ++ BUG_ON(!UNIONFS_D(dentry)); ++ BUG_ON(bstart < 0); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (lower_dentry) { ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ dput(lower_dentry); ++ } ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ if (lower_mnt) { ++ unionfs_set_lower_mnt_idx(dentry, bindex, NULL); ++ mntput(lower_mnt); ++ } ++ } ++ ++ if (free_lower) { ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ } ++} ++ ++/* ++ * dput/mntput all lower dentries and vfsmounts, and reset start/end branch ++ * indices to -1. ++ */ ++static inline void path_put_lowers_all(struct dentry *dentry, bool free_lower) ++{ ++ int bstart, bend; ++ ++ BUG_ON(!dentry); ++ BUG_ON(!UNIONFS_D(dentry)); ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ BUG_ON(bstart < 0); ++ ++ path_put_lowers(dentry, bstart, bend, free_lower); ++ dbstart(dentry) = dbend(dentry) = -1; ++} ++ ++#endif /* not _FANOUT_H */ +diff --git a/fs/unionfs/file.c b/fs/unionfs/file.c +new file mode 100644 +index 0000000..1c694c3 +--- /dev/null ++++ b/fs/unionfs/file.c +@@ -0,0 +1,382 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++static ssize_t unionfs_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ int err; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, false); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_read(lower_file, buf, count, ppos); ++ /* update our inode atime upon a successful lower read */ ++ if (err >= 0) { ++ fsstack_copy_attr_atime(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ unionfs_check_file(file); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static ssize_t unionfs_write(struct file *file, const char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ int err = 0; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, true); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_write(lower_file, buf, count, ppos); ++ /* update our inode times+sizes upon a successful lower write */ ++ if (err >= 0) { ++ fsstack_copy_inode_size(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ fsstack_copy_attr_times(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */ ++ unionfs_check_file(file); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_file_readdir(struct file *file, void *dirent, ++ filldir_t filldir) ++{ ++ return -ENOTDIR; ++} ++ ++static int unionfs_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ int err = 0; ++ bool willwrite; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ const struct vm_operations_struct *saved_vm_ops = NULL; ++ ++ /* ++ * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was ++ * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this ++ * has been causing false positives in file system stacking layers. ++ * In particular, our ->mmap is called after sys_mmap2 already holds ++ * mmap_sem, then we lock our own mutexes; but earlier, it's ++ * possible for lockdep to have locked our mutexes first, and then ++ * we call a lower ->readdir which could call might_fault. The ++ * different ordering of the locks is what lockdep complains about ++ * -- unnecessarily. Therefore, we have no choice but to tell ++ * lockdep to temporarily turn off lockdep here. Note: the comments ++ * inside might_sleep also suggest that it would have been ++ * nicer to only annotate paths that needs that might_lock_read. ++ */ ++ lockdep_off(); ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ /* This might be deferred to mmap's writepage */ ++ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); ++ err = unionfs_file_revalidate(file, parent, willwrite); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ /* ++ * File systems which do not implement ->writepage may use ++ * generic_file_readonly_mmap as their ->mmap op. If you call ++ * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. ++ * But we cannot call the lower ->mmap op, so we can't tell that ++ * writeable mappings won't work. Therefore, our only choice is to ++ * check if the lower file system supports the ->writepage, and if ++ * not, return EINVAL (the same error that ++ * generic_file_readonly_mmap returns in that case). ++ */ ++ lower_file = unionfs_lower_file(file); ++ if (willwrite && !lower_file->f_mapping->a_ops->writepage) { ++ err = -EINVAL; ++ printk(KERN_ERR "unionfs: branch %d file system does not " ++ "support writeable mmap\n", fbstart(file)); ++ goto out; ++ } ++ ++ /* ++ * find and save lower vm_ops. ++ * ++ * XXX: the VFS should have a cleaner way of finding the lower vm_ops ++ */ ++ if (!UNIONFS_F(file)->lower_vm_ops) { ++ err = lower_file->f_op->mmap(lower_file, vma); ++ if (err) { ++ printk(KERN_ERR "unionfs: lower mmap failed %d\n", err); ++ goto out; ++ } ++ saved_vm_ops = vma->vm_ops; ++ err = do_munmap(current->mm, vma->vm_start, ++ vma->vm_end - vma->vm_start); ++ if (err) { ++ printk(KERN_ERR "unionfs: do_munmap failed %d\n", err); ++ goto out; ++ } ++ } ++ ++ file->f_mapping->a_ops = &unionfs_dummy_aops; ++ err = generic_file_mmap(file, vma); ++ file->f_mapping->a_ops = &unionfs_aops; ++ if (err) { ++ printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err); ++ goto out; ++ } ++ vma->vm_ops = &unionfs_vm_ops; ++ if (!UNIONFS_F(file)->lower_vm_ops) ++ UNIONFS_F(file)->lower_vm_ops = saved_vm_ops; ++ ++out: ++ if (!err) { ++ /* copyup could cause parent dir times to change */ ++ unionfs_copy_attr_times(parent->d_inode); ++ unionfs_check_file(file); ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ lockdep_on(); ++ return err; ++} ++ ++int unionfs_fsync(struct file *file, int datasync) ++{ ++ int bindex, bstart, bend; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *lower_dentry; ++ struct dentry *parent; ++ struct inode *lower_inode, *inode; ++ int err = -EINVAL; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, true); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ if (bstart < 0 || bend < 0) ++ goto out; ++ ++ inode = dentry->d_inode; ++ if (unlikely(!inode)) { ++ printk(KERN_ERR ++ "unionfs: null lower inode in unionfs_fsync\n"); ++ goto out; ++ } ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode || !lower_inode->i_fop->fsync) ++ continue; ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ mutex_lock(&lower_inode->i_mutex); ++ err = lower_inode->i_fop->fsync(lower_file, datasync); ++ if (!err && bindex == bstart) ++ fsstack_copy_attr_times(inode, lower_inode); ++ mutex_unlock(&lower_inode->i_mutex); ++ if (err) ++ goto out; ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++int unionfs_fasync(int fd, struct file *file, int flag) ++{ ++ int bindex, bstart, bend; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ struct inode *lower_inode, *inode; ++ int err = 0; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, true); ++ if (unlikely(err)) ++ goto out; ++ unionfs_check_file(file); ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ if (bstart < 0 || bend < 0) ++ goto out; ++ ++ inode = dentry->d_inode; ++ if (unlikely(!inode)) { ++ printk(KERN_ERR ++ "unionfs: null lower inode in unionfs_fasync\n"); ++ goto out; ++ } ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode || !lower_inode->i_fop->fasync) ++ continue; ++ lower_file = unionfs_lower_file_idx(file, bindex); ++ mutex_lock(&lower_inode->i_mutex); ++ err = lower_inode->i_fop->fasync(fd, lower_file, flag); ++ if (!err && bindex == bstart) ++ fsstack_copy_attr_times(inode, lower_inode); ++ mutex_unlock(&lower_inode->i_mutex); ++ if (err) ++ goto out; ++ } ++ ++out: ++ if (!err) ++ unionfs_check_file(file); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static ssize_t unionfs_splice_read(struct file *file, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags) ++{ ++ ssize_t err; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, false); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_splice_to(lower_file, ppos, pipe, len, flags); ++ /* update our inode atime upon a successful lower splice-read */ ++ if (err >= 0) { ++ fsstack_copy_attr_atime(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ unionfs_check_file(file); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static ssize_t unionfs_splice_write(struct pipe_inode_info *pipe, ++ struct file *file, loff_t *ppos, ++ size_t len, unsigned int flags) ++{ ++ ssize_t err = 0; ++ struct file *lower_file; ++ struct dentry *dentry = file->f_path.dentry; ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ err = unionfs_file_revalidate(file, parent, true); ++ if (unlikely(err)) ++ goto out; ++ ++ lower_file = unionfs_lower_file(file); ++ err = vfs_splice_from(pipe, lower_file, ppos, len, flags); ++ /* update our inode times+sizes upon a successful lower write */ ++ if (err >= 0) { ++ fsstack_copy_inode_size(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ fsstack_copy_attr_times(dentry->d_inode, ++ lower_file->f_path.dentry->d_inode); ++ unionfs_check_file(file); ++ } ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++struct file_operations unionfs_main_fops = { ++ .llseek = generic_file_llseek, ++ .read = unionfs_read, ++ .write = unionfs_write, ++ .readdir = unionfs_file_readdir, ++ .unlocked_ioctl = unionfs_ioctl, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = unionfs_ioctl, ++#endif ++ .mmap = unionfs_mmap, ++ .open = unionfs_open, ++ .flush = unionfs_flush, ++ .release = unionfs_file_release, ++ .fsync = unionfs_fsync, ++ .fasync = unionfs_fasync, ++ .splice_read = unionfs_splice_read, ++ .splice_write = unionfs_splice_write, ++}; +diff --git a/fs/unionfs/inode.c b/fs/unionfs/inode.c +new file mode 100644 +index 0000000..4c36f16 +--- /dev/null ++++ b/fs/unionfs/inode.c +@@ -0,0 +1,1061 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Find a writeable branch to create new object in. Checks all writeble ++ * branches of the parent inode, from istart to iend order; if none are ++ * suitable, also tries branch 0 (which may require a copyup). ++ * ++ * Return a lower_dentry we can use to create object in, or ERR_PTR. ++ */ ++static struct dentry *find_writeable_branch(struct inode *parent, ++ struct dentry *dentry) ++{ ++ int err = -EINVAL; ++ int bindex, istart, iend; ++ struct dentry *lower_dentry = NULL; ++ ++ istart = ibstart(parent); ++ iend = ibend(parent); ++ if (istart < 0) ++ goto out; ++ ++begin: ++ for (bindex = istart; bindex <= iend; bindex++) { ++ /* skip non-writeable branches */ ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (err) { ++ err = -EROFS; ++ continue; ++ } ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ /* ++ * check for whiteouts in writeable branch, and remove them ++ * if necessary. ++ */ ++ err = check_unlink_whiteout(dentry, lower_dentry, bindex); ++ if (err > 0) /* ignore if whiteout found and removed */ ++ err = 0; ++ if (err) ++ continue; ++ /* if get here, we can write to the branch */ ++ break; ++ } ++ /* ++ * If istart wasn't already branch 0, and we got any error, then try ++ * branch 0 (which may require copyup) ++ */ ++ if (err && istart > 0) { ++ istart = iend = 0; ++ goto begin; ++ } ++ ++ /* ++ * If we tried even branch 0, and still got an error, abort. But if ++ * the error was an EROFS, then we should try to copyup. ++ */ ++ if (err && err != -EROFS) ++ goto out; ++ ++ /* ++ * If we get here, then check if copyup needed. If lower_dentry is ++ * NULL, create the entire dentry directory structure in branch 0. ++ */ ++ if (!lower_dentry) { ++ bindex = 0; ++ lower_dentry = create_parents(parent, dentry, ++ dentry->d_name.name, bindex); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ } ++ err = 0; /* all's well */ ++out: ++ if (err) ++ return ERR_PTR(err); ++ return lower_dentry; ++} ++ ++static int unionfs_create(struct inode *dir, struct dentry *dentry, ++ int mode, struct nameidata *nd_unused) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ struct dentry *parent; ++ int valid = 0; ++ struct nameidata lower_nd; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; /* same as what real_lookup does */ ++ goto out; ++ } ++ ++ lower_dentry = find_writeable_branch(dir, dentry); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out_unlock; ++ } ++ ++ err = init_lower_nd(&lower_nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ err = vfs_create(lower_parent_dentry->d_inode, lower_dentry, mode, ++ &lower_nd); ++ release_lower_nd(&lower_nd, err); ++ ++ if (!err) { ++ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(dir); ++ fsstack_copy_inode_size(dir, ++ lower_parent_dentry->d_inode); ++ /* update no. of links on parent directory */ ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ } ++ } ++ ++out_unlock: ++ unlock_dir(lower_parent_dentry); ++out: ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(dentry); ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * unionfs_lookup is the only special function which takes a dentry, yet we ++ * do NOT want to call __unionfs_d_revalidate_chain because by definition, ++ * we don't have a valid dentry here yet. ++ */ ++static struct dentry *unionfs_lookup(struct inode *dir, ++ struct dentry *dentry, ++ struct nameidata *nd_unused) ++{ ++ struct dentry *ret, *parent; ++ int err = 0; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ ++ /* ++ * As long as we lock/dget the parent, then can skip validating the ++ * parent now; we may have to rebuild this dentry on the next ++ * ->d_revalidate, however. ++ */ ++ ++ /* allocate dentry private data. We free it in ->d_release */ ++ err = new_dentry_private_data(dentry, UNIONFS_DMUTEX_CHILD); ++ if (unlikely(err)) { ++ ret = ERR_PTR(err); ++ goto out; ++ } ++ ++ ret = unionfs_lookup_full(dentry, parent, INTERPOSE_LOOKUP); ++ ++ if (!IS_ERR(ret)) { ++ if (ret) ++ dentry = ret; ++ /* lookup_full can return multiple positive dentries */ ++ if (dentry->d_inode && !S_ISDIR(dentry->d_inode->i_mode)) { ++ BUG_ON(dbstart(dentry) < 0); ++ unionfs_postcopyup_release(dentry); ++ } ++ unionfs_copy_attr_times(dentry->d_inode); ++ } ++ ++ unionfs_check_inode(dir); ++ if (!IS_ERR(ret)) ++ unionfs_check_dentry(dentry); ++ unionfs_check_dentry(parent); ++ unionfs_unlock_dentry(dentry); /* locked in new_dentry_private data */ ++ ++out: ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return ret; ++} ++ ++static int unionfs_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *new_dentry) ++{ ++ int err = 0; ++ struct dentry *lower_old_dentry = NULL; ++ struct dentry *lower_new_dentry = NULL; ++ struct dentry *lower_dir_dentry = NULL; ++ struct dentry *old_parent, *new_parent; ++ char *name = NULL; ++ bool valid; ++ ++ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ old_parent = dget_parent(old_dentry); ++ new_parent = dget_parent(new_dentry); ++ unionfs_double_lock_parents(old_parent, new_parent); ++ unionfs_double_lock_dentry(old_dentry, new_dentry); ++ ++ valid = __unionfs_d_revalidate(old_dentry, old_parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (new_dentry->d_inode) { ++ valid = __unionfs_d_revalidate(new_dentry, new_parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ } ++ ++ lower_new_dentry = unionfs_lower_dentry(new_dentry); ++ ++ /* check for a whiteout in new dentry branch, and delete it */ ++ err = check_unlink_whiteout(new_dentry, lower_new_dentry, ++ dbstart(new_dentry)); ++ if (err > 0) { /* whiteout found and removed successfully */ ++ lower_dir_dentry = dget_parent(lower_new_dentry); ++ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); ++ dput(lower_dir_dentry); ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ err = 0; ++ } ++ if (err) ++ goto out; ++ ++ /* check if parent hierachy is needed, then link in same branch */ ++ if (dbstart(old_dentry) != dbstart(new_dentry)) { ++ lower_new_dentry = create_parents(dir, new_dentry, ++ new_dentry->d_name.name, ++ dbstart(old_dentry)); ++ err = PTR_ERR(lower_new_dentry); ++ if (IS_COPYUP_ERR(err)) ++ goto docopyup; ++ if (!lower_new_dentry || IS_ERR(lower_new_dentry)) ++ goto out; ++ } ++ lower_new_dentry = unionfs_lower_dentry(new_dentry); ++ lower_old_dentry = unionfs_lower_dentry(old_dentry); ++ ++ BUG_ON(dbstart(old_dentry) != dbstart(new_dentry)); ++ lower_dir_dentry = lock_parent(lower_new_dentry); ++ err = is_robranch(old_dentry); ++ if (!err) { ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ err = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, ++ lower_new_dentry); ++ lockdep_on(); ++ } ++ unlock_dir(lower_dir_dentry); ++ ++docopyup: ++ if (IS_COPYUP_ERR(err)) { ++ int old_bstart = dbstart(old_dentry); ++ int bindex; ++ ++ for (bindex = old_bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_dentry(old_parent->d_inode, ++ old_dentry, old_bstart, ++ bindex, old_dentry->d_name.name, ++ old_dentry->d_name.len, NULL, ++ i_size_read(old_dentry->d_inode)); ++ if (err) ++ continue; ++ lower_new_dentry = ++ create_parents(dir, new_dentry, ++ new_dentry->d_name.name, ++ bindex); ++ lower_old_dentry = unionfs_lower_dentry(old_dentry); ++ lower_dir_dentry = lock_parent(lower_new_dentry); ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ /* do vfs_link */ ++ err = vfs_link(lower_old_dentry, ++ lower_dir_dentry->d_inode, ++ lower_new_dentry); ++ lockdep_on(); ++ unlock_dir(lower_dir_dentry); ++ goto check_link; ++ } ++ goto out; ++ } ++ ++check_link: ++ if (err || !lower_new_dentry->d_inode) ++ goto out; ++ ++ /* Its a hard link, so use the same inode */ ++ new_dentry->d_inode = igrab(old_dentry->d_inode); ++ d_add(new_dentry, new_dentry->d_inode); ++ unionfs_copy_attr_all(dir, lower_new_dentry->d_parent->d_inode); ++ fsstack_copy_inode_size(dir, lower_new_dentry->d_parent->d_inode); ++ ++ /* propagate number of hard-links */ ++ old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode); ++ /* new dentry's ctime may have changed due to hard-link counts */ ++ unionfs_copy_attr_times(new_dentry->d_inode); ++ ++out: ++ if (!new_dentry->d_inode) ++ d_drop(new_dentry); ++ ++ kfree(name); ++ if (!err) ++ unionfs_postcopyup_setmnt(new_dentry); ++ ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(new_dentry); ++ unionfs_check_dentry(old_dentry); ++ ++ unionfs_double_unlock_dentry(old_dentry, new_dentry); ++ unionfs_double_unlock_parents(old_parent, new_parent); ++ dput(new_parent); ++ dput(old_parent); ++ unionfs_read_unlock(old_dentry->d_sb); ++ ++ return err; ++} ++ ++static int unionfs_symlink(struct inode *dir, struct dentry *dentry, ++ const char *symname) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *wh_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ struct dentry *parent; ++ char *name = NULL; ++ int valid = 0; ++ umode_t mode; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ /* ++ * It's only a bug if this dentry was not negative and couldn't be ++ * revalidated (shouldn't happen). ++ */ ++ BUG_ON(!valid && dentry->d_inode); ++ ++ lower_dentry = find_writeable_branch(dir, dentry); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out_unlock; ++ } ++ ++ mode = S_IALLUGO; ++ err = vfs_symlink(lower_parent_dentry->d_inode, lower_dentry, symname); ++ if (!err) { ++ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(dir); ++ fsstack_copy_inode_size(dir, ++ lower_parent_dentry->d_inode); ++ /* update no. of links on parent directory */ ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ } ++ } ++ ++out_unlock: ++ unlock_dir(lower_parent_dentry); ++out: ++ dput(wh_dentry); ++ kfree(name); ++ ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(dentry); ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ struct dentry *parent; ++ int bindex = 0, bstart; ++ char *name = NULL; ++ int valid; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; /* same as what real_lookup does */ ++ goto out; ++ } ++ ++ bstart = dbstart(dentry); ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ /* check for a whiteout in new dentry branch, and delete it */ ++ err = check_unlink_whiteout(dentry, lower_dentry, bstart); ++ if (err > 0) /* whiteout found and removed successfully */ ++ err = 0; ++ if (err) { ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ bstart--; ++ } ++ ++ /* check if copyup's needed, and mkdir */ ++ for (bindex = bstart; bindex >= 0; bindex--) { ++ int i; ++ int bend = dbend(dentry); ++ ++ if (is_robranch_super(dentry->d_sb, bindex)) ++ continue; ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) { ++ lower_dentry = create_parents(dir, dentry, ++ dentry->d_name.name, ++ bindex); ++ if (!lower_dentry || IS_ERR(lower_dentry)) { ++ printk(KERN_ERR "unionfs: lower dentry " ++ " NULL for bindex = %d\n", bindex); ++ continue; ++ } ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out; ++ } ++ ++ err = vfs_mkdir(lower_parent_dentry->d_inode, lower_dentry, ++ mode); ++ ++ unlock_dir(lower_parent_dentry); ++ ++ /* did the mkdir succeed? */ ++ if (err) ++ break; ++ ++ for (i = bindex + 1; i <= bend; i++) { ++ /* XXX: use path_put_lowers? */ ++ if (unionfs_lower_dentry_idx(dentry, i)) { ++ dput(unionfs_lower_dentry_idx(dentry, i)); ++ unionfs_set_lower_dentry_idx(dentry, i, NULL); ++ } ++ } ++ dbend(dentry) = bindex; ++ ++ /* ++ * Only INTERPOSE_LOOKUP can return a value other than 0 on ++ * err. ++ */ ++ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(dir); ++ fsstack_copy_inode_size(dir, ++ lower_parent_dentry->d_inode); ++ ++ /* update number of links on parent directory */ ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ } ++ ++ err = make_dir_opaque(dentry, dbstart(dentry)); ++ if (err) { ++ printk(KERN_ERR "unionfs: mkdir: error creating " ++ ".wh.__dir_opaque: %d\n", err); ++ goto out; ++ } ++ ++ /* we are done! */ ++ break; ++ } ++ ++out: ++ if (!dentry->d_inode) ++ d_drop(dentry); ++ ++ kfree(name); ++ ++ if (!err) { ++ unionfs_copy_attr_times(dentry->d_inode); ++ unionfs_postcopyup_setmnt(dentry); ++ } ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode, ++ dev_t dev) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct dentry *wh_dentry = NULL; ++ struct dentry *lower_parent_dentry = NULL; ++ struct dentry *parent; ++ char *name = NULL; ++ int valid = 0; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ /* ++ * It's only a bug if this dentry was not negative and couldn't be ++ * revalidated (shouldn't happen). ++ */ ++ BUG_ON(!valid && dentry->d_inode); ++ ++ lower_dentry = find_writeable_branch(dir, dentry); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ lower_parent_dentry = lock_parent(lower_dentry); ++ if (IS_ERR(lower_parent_dentry)) { ++ err = PTR_ERR(lower_parent_dentry); ++ goto out_unlock; ++ } ++ ++ err = vfs_mknod(lower_parent_dentry->d_inode, lower_dentry, mode, dev); ++ if (!err) { ++ err = PTR_ERR(unionfs_interpose(dentry, dir->i_sb, 0)); ++ if (!err) { ++ unionfs_copy_attr_times(dir); ++ fsstack_copy_inode_size(dir, ++ lower_parent_dentry->d_inode); ++ /* update no. of links on parent directory */ ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ } ++ } ++ ++out_unlock: ++ unlock_dir(lower_parent_dentry); ++out: ++ dput(wh_dentry); ++ kfree(name); ++ ++ if (!err) { ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_inode(dir); ++ unionfs_check_dentry(dentry); ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* requires sb, dentry, and parent to already be locked */ ++static int __unionfs_readlink(struct dentry *dentry, char __user *buf, ++ int bufsiz) ++{ ++ int err; ++ struct dentry *lower_dentry; ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ if (!lower_dentry->d_inode->i_op || ++ !lower_dentry->d_inode->i_op->readlink) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = lower_dentry->d_inode->i_op->readlink(lower_dentry, ++ buf, bufsiz); ++ if (err >= 0) ++ fsstack_copy_attr_atime(dentry->d_inode, ++ lower_dentry->d_inode); ++ ++out: ++ return err; ++} ++ ++static int unionfs_readlink(struct dentry *dentry, char __user *buf, ++ int bufsiz) ++{ ++ int err; ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ err = __unionfs_readlink(dentry, buf, bufsiz); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ char *buf; ++ int len = PAGE_SIZE, err; ++ mm_segment_t old_fs; ++ struct dentry *parent; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ /* This is freed by the put_link method assuming a successful call. */ ++ buf = kmalloc(len, GFP_KERNEL); ++ if (unlikely(!buf)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* read the symlink, and then we will follow it */ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = __unionfs_readlink(dentry, buf, len); ++ set_fs(old_fs); ++ if (err < 0) { ++ kfree(buf); ++ buf = NULL; ++ goto out; ++ } ++ buf[err] = 0; ++ nd_set_link(nd, buf); ++ err = 0; ++ ++out: ++ if (err >= 0) { ++ unionfs_check_nd(nd); ++ unionfs_check_dentry(dentry); ++ } ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return ERR_PTR(err); ++} ++ ++/* this @nd *IS* still used */ ++static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd, ++ void *cookie) ++{ ++ struct dentry *parent; ++ char *buf; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) ++ printk(KERN_ERR ++ "unionfs: put_link failed to revalidate dentry\n"); ++ ++ unionfs_check_dentry(dentry); ++#if 0 ++ /* XXX: can't run this check b/c this fxn can receive a poisoned 'nd' PTR */ ++ unionfs_check_nd(nd); ++#endif ++ buf = nd_get_link(nd); ++ if (!IS_ERR(buf)) ++ kfree(buf); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++} ++ ++/* ++ * This is a variant of fs/namei.c:permission() or inode_permission() which ++ * skips over EROFS tests (because we perform copyup on EROFS). ++ */ ++static int __inode_permission(struct inode *inode, int mask) ++{ ++ int retval; ++ ++ /* nobody gets write access to an immutable file */ ++ if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) ++ return -EACCES; ++ ++ /* Ordinary permission routines do not understand MAY_APPEND. */ ++ if (inode->i_op && inode->i_op->permission) { ++ retval = inode->i_op->permission(inode, mask); ++ if (!retval) { ++ /* ++ * Exec permission on a regular file is denied if none ++ * of the execute bits are set. ++ * ++ * This check should be done by the ->permission() ++ * method. ++ */ ++ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) && ++ !(inode->i_mode & S_IXUGO)) ++ return -EACCES; ++ } ++ } else { ++ retval = generic_permission(inode, mask, NULL); ++ } ++ if (retval) ++ return retval; ++ ++ return security_inode_permission(inode, ++ mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND)); ++} ++ ++/* ++ * Don't grab the superblock read-lock in unionfs_permission, which prevents ++ * a deadlock with the branch-management "add branch" code (which grabbed ++ * the write lock). It is safe to not grab the read lock here, because even ++ * with branch management taking place, there is no chance that ++ * unionfs_permission, or anything it calls, will use stale branch ++ * information. ++ */ ++static int unionfs_permission(struct inode *inode, int mask) ++{ ++ struct inode *lower_inode = NULL; ++ int err = 0; ++ int bindex, bstart, bend; ++ const int is_file = !S_ISDIR(inode->i_mode); ++ const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ); ++ struct inode *inode_grabbed = igrab(inode); ++ struct dentry *dentry = d_find_alias(inode); ++ ++ if (dentry) ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (!UNIONFS_I(inode)->lower_inodes) { ++ if (is_file) /* dirs can be unlinked but chdir'ed to */ ++ err = -ESTALE; /* force revalidate */ ++ goto out; ++ } ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ if (unlikely(bstart < 0 || bend < 0)) { ++ /* ++ * With branch-management, we can get a stale inode here. ++ * If so, we return ESTALE back to link_path_walk, which ++ * would discard the dcache entry and re-lookup the ++ * dentry+inode. This should be equivalent to issuing ++ * __unionfs_d_revalidate_chain on nd.dentry here. ++ */ ++ if (is_file) /* dirs can be unlinked but chdir'ed to */ ++ err = -ESTALE; /* force revalidate */ ++ goto out; ++ } ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ ++ /* ++ * check the condition for D-F-D underlying files/directories, ++ * we don't have to check for files, if we are checking for ++ * directories. ++ */ ++ if (!is_file && !S_ISDIR(lower_inode->i_mode)) ++ continue; ++ ++ /* ++ * We check basic permissions, but we ignore any conditions ++ * such as readonly file systems or branches marked as ++ * readonly, because those conditions should lead to a ++ * copyup taking place later on. However, if user never had ++ * access to the file, then no copyup could ever take place. ++ */ ++ err = __inode_permission(lower_inode, mask); ++ if (err && err != -EACCES && err != EPERM && bindex > 0) { ++ umode_t mode = lower_inode->i_mode; ++ if ((is_robranch_super(inode->i_sb, bindex) || ++ __is_rdonly(lower_inode)) && ++ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) ++ err = 0; ++ if (IS_COPYUP_ERR(err)) ++ err = 0; ++ } ++ ++ /* ++ * NFS HACK: NFSv2/3 return EACCES on readonly-exported, ++ * locally readonly-mounted file systems, instead of EROFS ++ * like other file systems do. So we have no choice here ++ * but to intercept this and ignore it for NFS branches ++ * marked readonly. Specifically, we avoid using NFS's own ++ * "broken" ->permission method, and rely on ++ * generic_permission() to do basic checking for us. ++ */ ++ if (err && err == -EACCES && ++ is_robranch_super(inode->i_sb, bindex) && ++ lower_inode->i_sb->s_magic == NFS_SUPER_MAGIC) ++ err = generic_permission(lower_inode, mask, NULL); ++ ++ /* ++ * The permissions are an intersection of the overall directory ++ * permissions, so we fail if one fails. ++ */ ++ if (err) ++ goto out; ++ ++ /* only the leftmost file matters. */ ++ if (is_file || write_mask) { ++ if (is_file && write_mask) { ++ err = get_write_access(lower_inode); ++ if (!err) ++ put_write_access(lower_inode); ++ } ++ break; ++ } ++ } ++ /* sync times which may have changed (asynchronously) below */ ++ unionfs_copy_attr_times(inode); ++ ++out: ++ unionfs_check_inode(inode); ++ if (dentry) { ++ unionfs_unlock_dentry(dentry); ++ dput(dentry); ++ } ++ iput(inode_grabbed); ++ return err; ++} ++ ++static int unionfs_setattr(struct dentry *dentry, struct iattr *ia) ++{ ++ int err = 0; ++ struct dentry *lower_dentry; ++ struct dentry *parent; ++ struct inode *inode; ++ struct inode *lower_inode; ++ int bstart, bend, bindex; ++ loff_t size; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ if (unlikely(!__unionfs_d_revalidate(dentry, parent, false))) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ inode = dentry->d_inode; ++ ++ /* ++ * mode change is for clearing setuid/setgid. Allow lower filesystem ++ * to reinterpret it in its own way. ++ */ ++ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ++ ia->ia_valid &= ~ATTR_MODE; ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ if (!lower_dentry) { /* should never happen after above revalidate */ ++ err = -EINVAL; ++ goto out; ++ } ++ lower_inode = unionfs_lower_inode(inode); ++ ++ /* check if user has permission to change lower inode */ ++ err = inode_change_ok(lower_inode, ia); ++ if (err) ++ goto out; ++ ++ /* copyup if the file is on a read only branch */ ++ if (is_robranch_super(dentry->d_sb, bstart) ++ || __is_rdonly(lower_inode)) { ++ /* check if we have a branch to copy up to */ ++ if (bstart <= 0) { ++ err = -EACCES; ++ goto out; ++ } ++ ++ if (ia->ia_valid & ATTR_SIZE) ++ size = ia->ia_size; ++ else ++ size = i_size_read(inode); ++ /* copyup to next available branch */ ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_dentry(parent->d_inode, ++ dentry, bstart, bindex, ++ dentry->d_name.name, ++ dentry->d_name.len, ++ NULL, size); ++ if (!err) ++ break; ++ } ++ if (err) ++ goto out; ++ /* get updated lower_dentry/inode after copyup */ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ lower_inode = unionfs_lower_inode(inode); ++ } ++ ++ /* ++ * If shrinking, first truncate upper level to cancel writing dirty ++ * pages beyond the new eof; and also if its' maxbytes is more ++ * limiting (fail with -EFBIG before making any change to the lower ++ * level). There is no need to vmtruncate the upper level ++ * afterwards in the other cases: we fsstack_copy_inode_size from ++ * the lower level. ++ */ ++ if (ia->ia_valid & ATTR_SIZE) { ++ size = i_size_read(inode); ++ if (ia->ia_size < size || (ia->ia_size > size && ++ inode->i_sb->s_maxbytes < lower_inode->i_sb->s_maxbytes)) { ++ err = vmtruncate(inode, ia->ia_size); ++ if (err) ++ goto out; ++ } ++ } ++ ++ /* notify the (possibly copied-up) lower inode */ ++ /* ++ * Note: we use lower_dentry->d_inode, because lower_inode may be ++ * unlinked (no inode->i_sb and i_ino==0. This happens if someone ++ * tries to open(), unlink(), then ftruncate() a file. ++ */ ++ mutex_lock(&lower_dentry->d_inode->i_mutex); ++ err = notify_change(lower_dentry, ia); ++ mutex_unlock(&lower_dentry->d_inode->i_mutex); ++ if (err) ++ goto out; ++ ++ /* get attributes from the first lower inode */ ++ if (ibstart(inode) >= 0) ++ unionfs_copy_attr_all(inode, lower_inode); ++ /* ++ * unionfs_copy_attr_all will copy the lower times to our inode if ++ * the lower ones are newer (useful for cache coherency). However, ++ * ->setattr is the only place in which we may have to copy the ++ * lower inode times absolutely, to support utimes(2). ++ */ ++ if (ia->ia_valid & ATTR_MTIME_SET) ++ inode->i_mtime = lower_inode->i_mtime; ++ if (ia->ia_valid & ATTR_CTIME) ++ inode->i_ctime = lower_inode->i_ctime; ++ if (ia->ia_valid & ATTR_ATIME_SET) ++ inode->i_atime = lower_inode->i_atime; ++ fsstack_copy_inode_size(inode, lower_inode); ++ ++out: ++ if (!err) ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++struct inode_operations unionfs_symlink_iops = { ++ .readlink = unionfs_readlink, ++ .permission = unionfs_permission, ++ .follow_link = unionfs_follow_link, ++ .setattr = unionfs_setattr, ++ .put_link = unionfs_put_link, ++}; ++ ++struct inode_operations unionfs_dir_iops = { ++ .create = unionfs_create, ++ .lookup = unionfs_lookup, ++ .link = unionfs_link, ++ .unlink = unionfs_unlink, ++ .symlink = unionfs_symlink, ++ .mkdir = unionfs_mkdir, ++ .rmdir = unionfs_rmdir, ++ .mknod = unionfs_mknod, ++ .rename = unionfs_rename, ++ .permission = unionfs_permission, ++ .setattr = unionfs_setattr, ++#ifdef CONFIG_UNION_FS_XATTR ++ .setxattr = unionfs_setxattr, ++ .getxattr = unionfs_getxattr, ++ .removexattr = unionfs_removexattr, ++ .listxattr = unionfs_listxattr, ++#endif /* CONFIG_UNION_FS_XATTR */ ++}; ++ ++struct inode_operations unionfs_main_iops = { ++ .permission = unionfs_permission, ++ .setattr = unionfs_setattr, ++#ifdef CONFIG_UNION_FS_XATTR ++ .setxattr = unionfs_setxattr, ++ .getxattr = unionfs_getxattr, ++ .removexattr = unionfs_removexattr, ++ .listxattr = unionfs_listxattr, ++#endif /* CONFIG_UNION_FS_XATTR */ ++}; +diff --git a/fs/unionfs/lookup.c b/fs/unionfs/lookup.c +new file mode 100644 +index 0000000..b63c17e +--- /dev/null ++++ b/fs/unionfs/lookup.c +@@ -0,0 +1,569 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Lookup one path component @name relative to a <base,mnt> path pair. ++ * Behaves nearly the same as lookup_one_len (i.e., return negative dentry ++ * on ENOENT), but uses the @mnt passed, so it can cross bind mounts and ++ * other lower mounts properly. If @new_mnt is non-null, will fill in the ++ * new mnt there. Caller is responsible to dput/mntput/path_put returned ++ * @dentry and @new_mnt. ++ */ ++struct dentry *__lookup_one(struct dentry *base, struct vfsmount *mnt, ++ const char *name, struct vfsmount **new_mnt) ++{ ++ struct dentry *dentry = NULL; ++ struct nameidata lower_nd; ++ int err; ++ ++ /* we use flags=0 to get basic lookup */ ++ err = vfs_path_lookup(base, mnt, name, 0, &lower_nd); ++ ++ switch (err) { ++ case 0: /* no error */ ++ dentry = lower_nd.path.dentry; ++ if (new_mnt) ++ *new_mnt = lower_nd.path.mnt; /* rc already inc'ed */ ++ break; ++ case -ENOENT: ++ /* ++ * We don't consider ENOENT an error, and we want to return ++ * a negative dentry (ala lookup_one_len). As we know ++ * there was no inode for this name before (-ENOENT), then ++ * it's safe to call lookup_one_len (which doesn't take a ++ * vfsmount). ++ */ ++ dentry = lookup_lck_len(name, base, strlen(name)); ++ if (new_mnt) ++ *new_mnt = mntget(lower_nd.path.mnt); ++ break; ++ default: /* all other real errors */ ++ dentry = ERR_PTR(err); ++ break; ++ } ++ ++ return dentry; ++} ++ ++/* ++ * This is a utility function that fills in a unionfs dentry. ++ * Caller must lock this dentry with unionfs_lock_dentry. ++ * ++ * Returns: 0 (ok), or -ERRNO if an error occurred. ++ * XXX: get rid of _partial_lookup and make callers call _lookup_full directly ++ */ ++int unionfs_partial_lookup(struct dentry *dentry, struct dentry *parent) ++{ ++ struct dentry *tmp; ++ int err = -ENOSYS; ++ ++ tmp = unionfs_lookup_full(dentry, parent, INTERPOSE_PARTIAL); ++ ++ if (!tmp) { ++ err = 0; ++ goto out; ++ } ++ if (IS_ERR(tmp)) { ++ err = PTR_ERR(tmp); ++ goto out; ++ } ++ /* XXX: need to change the interface */ ++ BUG_ON(tmp != dentry); ++out: ++ return err; ++} ++ ++/* The dentry cache is just so we have properly sized dentries. */ ++static struct kmem_cache *unionfs_dentry_cachep; ++int unionfs_init_dentry_cache(void) ++{ ++ unionfs_dentry_cachep = ++ kmem_cache_create("unionfs_dentry", ++ sizeof(struct unionfs_dentry_info), ++ 0, SLAB_RECLAIM_ACCOUNT, NULL); ++ ++ return (unionfs_dentry_cachep ? 0 : -ENOMEM); ++} ++ ++void unionfs_destroy_dentry_cache(void) ++{ ++ if (unionfs_dentry_cachep) ++ kmem_cache_destroy(unionfs_dentry_cachep); ++} ++ ++void free_dentry_private_data(struct dentry *dentry) ++{ ++ if (!dentry || !dentry->d_fsdata) ++ return; ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata); ++ dentry->d_fsdata = NULL; ++} ++ ++static inline int __realloc_dentry_private_data(struct dentry *dentry) ++{ ++ struct unionfs_dentry_info *info = UNIONFS_D(dentry); ++ void *p; ++ int size; ++ ++ BUG_ON(!info); ++ ++ size = sizeof(struct path) * sbmax(dentry->d_sb); ++ p = krealloc(info->lower_paths, size, GFP_ATOMIC); ++ if (unlikely(!p)) ++ return -ENOMEM; ++ ++ info->lower_paths = p; ++ ++ info->bstart = -1; ++ info->bend = -1; ++ info->bopaque = -1; ++ info->bcount = sbmax(dentry->d_sb); ++ atomic_set(&info->generation, ++ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation)); ++ ++ memset(info->lower_paths, 0, size); ++ ++ return 0; ++} ++ ++/* UNIONFS_D(dentry)->lock must be locked */ ++int realloc_dentry_private_data(struct dentry *dentry) ++{ ++ if (!__realloc_dentry_private_data(dentry)) ++ return 0; ++ ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ free_dentry_private_data(dentry); ++ return -ENOMEM; ++} ++ ++/* allocate new dentry private data */ ++int new_dentry_private_data(struct dentry *dentry, int subclass) ++{ ++ struct unionfs_dentry_info *info = UNIONFS_D(dentry); ++ ++ BUG_ON(info); ++ ++ info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC); ++ if (unlikely(!info)) ++ return -ENOMEM; ++ ++ mutex_init(&info->lock); ++ mutex_lock_nested(&info->lock, subclass); ++ ++ info->lower_paths = NULL; ++ ++ dentry->d_fsdata = info; ++ ++ if (!__realloc_dentry_private_data(dentry)) ++ return 0; ++ ++ mutex_unlock(&info->lock); ++ free_dentry_private_data(dentry); ++ return -ENOMEM; ++} ++ ++/* ++ * scan through the lower dentry objects, and set bstart to reflect the ++ * starting branch ++ */ ++void update_bstart(struct dentry *dentry) ++{ ++ int bindex; ++ int bstart = dbstart(dentry); ++ int bend = dbend(dentry); ++ struct dentry *lower_dentry; ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ if (lower_dentry->d_inode) { ++ dbstart(dentry) = bindex; ++ break; ++ } ++ dput(lower_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } ++} ++ ++ ++/* ++ * Initialize a nameidata structure (the intent part) we can pass to a lower ++ * file system. Returns 0 on success or -error (only -ENOMEM possible). ++ * Inside that nd structure, this function may also return an allocated ++ * struct file (for open intents). The caller, when done with this nd, must ++ * kfree the intent file (using release_lower_nd). ++ * ++ * XXX: this code, and the callers of this code, should be redone using ++ * vfs_path_lookup() when (1) the nameidata structure is refactored into a ++ * separate intent-structure, and (2) open_namei() is broken into a VFS-only ++ * function and a method that other file systems can call. ++ */ ++int init_lower_nd(struct nameidata *nd, unsigned int flags) ++{ ++ int err = 0; ++#ifdef ALLOC_LOWER_ND_FILE ++ /* ++ * XXX: one day we may need to have the lower return an open file ++ * for us. It is not needed in 2.6.23-rc1 for nfs2/nfs3, but may ++ * very well be needed for nfs4. ++ */ ++ struct file *file; ++#endif /* ALLOC_LOWER_ND_FILE */ ++ ++ memset(nd, 0, sizeof(struct nameidata)); ++ if (!flags) ++ return err; ++ ++ switch (flags) { ++ case LOOKUP_CREATE: ++ nd->intent.open.flags |= O_CREAT; ++ /* fall through: shared code for create/open cases */ ++ case LOOKUP_OPEN: ++ nd->flags = flags; ++ nd->intent.open.flags |= (FMODE_READ | FMODE_WRITE); ++#ifdef ALLOC_LOWER_ND_FILE ++ file = kzalloc(sizeof(struct file), GFP_KERNEL); ++ if (unlikely(!file)) { ++ err = -ENOMEM; ++ break; /* exit switch statement and thus return */ ++ } ++ nd->intent.open.file = file; ++#endif /* ALLOC_LOWER_ND_FILE */ ++ break; ++ default: ++ /* ++ * We should never get here, for now. ++ * We can add new cases here later on. ++ */ ++ pr_debug("unionfs: unknown nameidata flag 0x%x\n", flags); ++ BUG(); ++ break; ++ } ++ ++ return err; ++} ++ ++void release_lower_nd(struct nameidata *nd, int err) ++{ ++ if (!nd->intent.open.file) ++ return; ++ else if (!err) ++ release_open_intent(nd); ++#ifdef ALLOC_LOWER_ND_FILE ++ kfree(nd->intent.open.file); ++#endif /* ALLOC_LOWER_ND_FILE */ ++} ++ ++/* ++ * Main (and complex) driver function for Unionfs's lookup ++ * ++ * Returns: NULL (ok), ERR_PTR if an error occurred, or a non-null non-error ++ * PTR if d_splice returned a different dentry. ++ * ++ * If lookupmode is INTERPOSE_PARTIAL/REVAL/REVAL_NEG, the passed dentry's ++ * inode info must be locked. If lookupmode is INTERPOSE_LOOKUP (i.e., a ++ * newly looked-up dentry), then unionfs_lookup_backend will return a locked ++ * dentry's info, which the caller must unlock. ++ */ ++struct dentry *unionfs_lookup_full(struct dentry *dentry, ++ struct dentry *parent, int lookupmode) ++{ ++ int err = 0; ++ struct dentry *lower_dentry = NULL; ++ struct vfsmount *lower_mnt; ++ struct vfsmount *lower_dir_mnt; ++ struct dentry *wh_lower_dentry = NULL; ++ struct dentry *lower_dir_dentry = NULL; ++ struct dentry *d_interposed = NULL; ++ int bindex, bstart, bend, bopaque; ++ int opaque, num_positive = 0; ++ const char *name; ++ int namelen; ++ int pos_start, pos_end; ++ ++ /* ++ * We should already have a lock on this dentry in the case of a ++ * partial lookup, or a revalidation. Otherwise it is returned from ++ * new_dentry_private_data already locked. ++ */ ++ verify_locked(dentry); ++ verify_locked(parent); ++ ++ /* must initialize dentry operations */ ++ dentry->d_op = &unionfs_dops; ++ ++ /* We never partial lookup the root directory. */ ++ if (IS_ROOT(dentry)) ++ goto out; ++ ++ name = dentry->d_name.name; ++ namelen = dentry->d_name.len; ++ ++ /* No dentries should get created for possible whiteout names. */ ++ if (!is_validname(name)) { ++ err = -EPERM; ++ goto out_free; ++ } ++ ++ /* Now start the actual lookup procedure. */ ++ bstart = dbstart(parent); ++ bend = dbend(parent); ++ bopaque = dbopaque(parent); ++ BUG_ON(bstart < 0); ++ ++ /* adjust bend to bopaque if needed */ ++ if ((bopaque >= 0) && (bopaque < bend)) ++ bend = bopaque; ++ ++ /* lookup all possible dentries */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ lower_mnt = unionfs_lower_mnt_idx(dentry, bindex); ++ ++ /* skip if we already have a positive lower dentry */ ++ if (lower_dentry) { ++ if (dbstart(dentry) < 0) ++ dbstart(dentry) = bindex; ++ if (bindex > dbend(dentry)) ++ dbend(dentry) = bindex; ++ if (lower_dentry->d_inode) ++ num_positive++; ++ continue; ++ } ++ ++ lower_dir_dentry = ++ unionfs_lower_dentry_idx(parent, bindex); ++ /* if the lower dentry's parent does not exist, skip this */ ++ if (!lower_dir_dentry || !lower_dir_dentry->d_inode) ++ continue; ++ ++ /* also skip it if the parent isn't a directory. */ ++ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode)) ++ continue; /* XXX: should be BUG_ON */ ++ ++ /* check for whiteouts: stop lookup if found */ ++ wh_lower_dentry = lookup_whiteout(name, lower_dir_dentry); ++ if (IS_ERR(wh_lower_dentry)) { ++ err = PTR_ERR(wh_lower_dentry); ++ goto out_free; ++ } ++ if (wh_lower_dentry->d_inode) { ++ dbend(dentry) = dbopaque(dentry) = bindex; ++ if (dbstart(dentry) < 0) ++ dbstart(dentry) = bindex; ++ dput(wh_lower_dentry); ++ break; ++ } ++ dput(wh_lower_dentry); ++ ++ /* Now do regular lookup; lookup @name */ ++ lower_dir_mnt = unionfs_lower_mnt_idx(parent, bindex); ++ lower_mnt = NULL; /* XXX: needed? */ ++ ++ lower_dentry = __lookup_one(lower_dir_dentry, lower_dir_mnt, ++ name, &lower_mnt); ++ ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out_free; ++ } ++ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry); ++ if (!lower_mnt) ++ lower_mnt = unionfs_mntget(dentry->d_sb->s_root, ++ bindex); ++ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt); ++ ++ /* adjust dbstart/end */ ++ if (dbstart(dentry) < 0) ++ dbstart(dentry) = bindex; ++ if (bindex > dbend(dentry)) ++ dbend(dentry) = bindex; ++ /* ++ * We always store the lower dentries above, and update ++ * dbstart/dbend, even if the whole unionfs dentry is ++ * negative (i.e., no lower inodes). ++ */ ++ if (!lower_dentry->d_inode) ++ continue; ++ num_positive++; ++ ++ /* ++ * check if we just found an opaque directory, if so, stop ++ * lookups here. ++ */ ++ if (!S_ISDIR(lower_dentry->d_inode->i_mode)) ++ continue; ++ opaque = is_opaque_dir(dentry, bindex); ++ if (opaque < 0) { ++ err = opaque; ++ goto out_free; ++ } else if (opaque) { ++ dbend(dentry) = dbopaque(dentry) = bindex; ++ break; ++ } ++ dbend(dentry) = bindex; ++ ++ /* update parent directory's atime with the bindex */ ++ fsstack_copy_attr_atime(parent->d_inode, ++ lower_dir_dentry->d_inode); ++ } ++ ++ /* sanity checks, then decide if to process a negative dentry */ ++ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0); ++ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0); ++ ++ if (num_positive > 0) ++ goto out_positive; ++ ++ /*** handle NEGATIVE dentries ***/ ++ ++ /* ++ * If negative, keep only first lower negative dentry, to save on ++ * memory. ++ */ ++ if (dbstart(dentry) < dbend(dentry)) { ++ path_put_lowers(dentry, dbstart(dentry) + 1, ++ dbend(dentry), false); ++ dbend(dentry) = dbstart(dentry); ++ } ++ if (lookupmode == INTERPOSE_PARTIAL) ++ goto out; ++ if (lookupmode == INTERPOSE_LOOKUP) { ++ /* ++ * If all we found was a whiteout in the first available ++ * branch, then create a negative dentry for a possibly new ++ * file to be created. ++ */ ++ if (dbopaque(dentry) < 0) ++ goto out; ++ /* XXX: need to get mnt here */ ++ bindex = dbstart(dentry); ++ if (unionfs_lower_dentry_idx(dentry, bindex)) ++ goto out; ++ lower_dir_dentry = ++ unionfs_lower_dentry_idx(parent, bindex); ++ if (!lower_dir_dentry || !lower_dir_dentry->d_inode) ++ goto out; ++ if (!S_ISDIR(lower_dir_dentry->d_inode->i_mode)) ++ goto out; /* XXX: should be BUG_ON */ ++ /* XXX: do we need to cross bind mounts here? */ ++ lower_dentry = lookup_lck_len(name, lower_dir_dentry, namelen); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ /* XXX: need to mntget/mntput as needed too! */ ++ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry); ++ /* XXX: wrong mnt for crossing bind mounts! */ ++ lower_mnt = unionfs_mntget(dentry->d_sb->s_root, bindex); ++ unionfs_set_lower_mnt_idx(dentry, bindex, lower_mnt); ++ ++ goto out; ++ } ++ ++ /* if we're revalidating a positive dentry, don't make it negative */ ++ if (lookupmode != INTERPOSE_REVAL) ++ d_add(dentry, NULL); ++ ++ goto out; ++ ++out_positive: ++ /*** handle POSITIVE dentries ***/ ++ ++ /* ++ * This unionfs dentry is positive (at least one lower inode ++ * exists), so scan entire dentry from beginning to end, and remove ++ * any negative lower dentries, if any. Then, update dbstart/dbend ++ * to reflect the start/end of positive dentries. ++ */ ++ pos_start = pos_end = -1; ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, ++ bindex); ++ if (lower_dentry && lower_dentry->d_inode) { ++ if (pos_start < 0) ++ pos_start = bindex; ++ if (bindex > pos_end) ++ pos_end = bindex; ++ continue; ++ } ++ path_put_lowers(dentry, bindex, bindex, false); ++ } ++ if (pos_start >= 0) ++ dbstart(dentry) = pos_start; ++ if (pos_end >= 0) ++ dbend(dentry) = pos_end; ++ ++ /* Partial lookups need to re-interpose, or throw away older negs. */ ++ if (lookupmode == INTERPOSE_PARTIAL) { ++ if (dentry->d_inode) { ++ unionfs_reinterpose(dentry); ++ goto out; ++ } ++ ++ /* ++ * This dentry was positive, so it is as if we had a ++ * negative revalidation. ++ */ ++ lookupmode = INTERPOSE_REVAL_NEG; ++ update_bstart(dentry); ++ } ++ ++ /* ++ * Interpose can return a dentry if d_splice returned a different ++ * dentry. ++ */ ++ d_interposed = unionfs_interpose(dentry, dentry->d_sb, lookupmode); ++ if (IS_ERR(d_interposed)) ++ err = PTR_ERR(d_interposed); ++ else if (d_interposed) ++ dentry = d_interposed; ++ ++ if (!err) ++ goto out; ++ d_drop(dentry); ++ ++out_free: ++ /* should dput/mntput all the underlying dentries on error condition */ ++ if (dbstart(dentry) >= 0) ++ path_put_lowers_all(dentry, false); ++ /* free lower_paths unconditionally */ ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ ++out: ++ if (dentry && UNIONFS_D(dentry)) { ++ BUG_ON(dbstart(dentry) < 0 && dbend(dentry) >= 0); ++ BUG_ON(dbstart(dentry) >= 0 && dbend(dentry) < 0); ++ } ++ if (d_interposed && UNIONFS_D(d_interposed)) { ++ BUG_ON(dbstart(d_interposed) < 0 && dbend(d_interposed) >= 0); ++ BUG_ON(dbstart(d_interposed) >= 0 && dbend(d_interposed) < 0); ++ } ++ ++ if (!err && d_interposed) ++ return d_interposed; ++ return ERR_PTR(err); ++} +diff --git a/fs/unionfs/main.c b/fs/unionfs/main.c +new file mode 100644 +index 0000000..258386e +--- /dev/null ++++ b/fs/unionfs/main.c +@@ -0,0 +1,758 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++#include <linux/module.h> ++#include <linux/moduleparam.h> ++ ++static void unionfs_fill_inode(struct dentry *dentry, ++ struct inode *inode) ++{ ++ struct inode *lower_inode; ++ struct dentry *lower_dentry; ++ int bindex, bstart, bend; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) { ++ unionfs_set_lower_inode_idx(inode, bindex, NULL); ++ continue; ++ } ++ ++ /* Initialize the lower inode to the new lower inode. */ ++ if (!lower_dentry->d_inode) ++ continue; ++ ++ unionfs_set_lower_inode_idx(inode, bindex, ++ igrab(lower_dentry->d_inode)); ++ } ++ ++ ibstart(inode) = dbstart(dentry); ++ ibend(inode) = dbend(dentry); ++ ++ /* Use attributes from the first branch. */ ++ lower_inode = unionfs_lower_inode(inode); ++ ++ /* Use different set of inode ops for symlinks & directories */ ++ if (S_ISLNK(lower_inode->i_mode)) ++ inode->i_op = &unionfs_symlink_iops; ++ else if (S_ISDIR(lower_inode->i_mode)) ++ inode->i_op = &unionfs_dir_iops; ++ ++ /* Use different set of file ops for directories */ ++ if (S_ISDIR(lower_inode->i_mode)) ++ inode->i_fop = &unionfs_dir_fops; ++ ++ /* properly initialize special inodes */ ++ if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || ++ S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) ++ init_special_inode(inode, lower_inode->i_mode, ++ lower_inode->i_rdev); ++ ++ /* all well, copy inode attributes */ ++ unionfs_copy_attr_all(inode, lower_inode); ++ fsstack_copy_inode_size(inode, lower_inode); ++} ++ ++/* ++ * Connect a unionfs inode dentry/inode with several lower ones. This is ++ * the classic stackable file system "vnode interposition" action. ++ * ++ * @sb: unionfs's super_block ++ */ ++struct dentry *unionfs_interpose(struct dentry *dentry, struct super_block *sb, ++ int flag) ++{ ++ int err = 0; ++ struct inode *inode; ++ int need_fill_inode = 1; ++ struct dentry *spliced = NULL; ++ ++ verify_locked(dentry); ++ ++ /* ++ * We allocate our new inode below by calling unionfs_iget, ++ * which will initialize some of the new inode's fields ++ */ ++ ++ /* ++ * On revalidate we've already got our own inode and just need ++ * to fix it up. ++ */ ++ if (flag == INTERPOSE_REVAL) { ++ inode = dentry->d_inode; ++ UNIONFS_I(inode)->bstart = -1; ++ UNIONFS_I(inode)->bend = -1; ++ atomic_set(&UNIONFS_I(inode)->generation, ++ atomic_read(&UNIONFS_SB(sb)->generation)); ++ ++ UNIONFS_I(inode)->lower_inodes = ++ kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL); ++ if (unlikely(!UNIONFS_I(inode)->lower_inodes)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ } else { ++ /* get unique inode number for unionfs */ ++ inode = unionfs_iget(sb, iunique(sb, UNIONFS_ROOT_INO)); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out; ++ } ++ if (atomic_read(&inode->i_count) > 1) ++ goto skip; ++ } ++ ++ need_fill_inode = 0; ++ unionfs_fill_inode(dentry, inode); ++ ++skip: ++ /* only (our) lookup wants to do a d_add */ ++ switch (flag) { ++ case INTERPOSE_DEFAULT: ++ /* for operations which create new inodes */ ++ d_add(dentry, inode); ++ break; ++ case INTERPOSE_REVAL_NEG: ++ d_instantiate(dentry, inode); ++ break; ++ case INTERPOSE_LOOKUP: ++ spliced = d_splice_alias(inode, dentry); ++ if (spliced && spliced != dentry) { ++ /* ++ * d_splice can return a dentry if it was ++ * disconnected and had to be moved. We must ensure ++ * that the private data of the new dentry is ++ * correct and that the inode info was filled ++ * properly. Finally we must return this new ++ * dentry. ++ */ ++ spliced->d_op = &unionfs_dops; ++ spliced->d_fsdata = dentry->d_fsdata; ++ dentry->d_fsdata = NULL; ++ dentry = spliced; ++ if (need_fill_inode) { ++ need_fill_inode = 0; ++ unionfs_fill_inode(dentry, inode); ++ } ++ goto out_spliced; ++ } else if (!spliced) { ++ if (need_fill_inode) { ++ need_fill_inode = 0; ++ unionfs_fill_inode(dentry, inode); ++ goto out_spliced; ++ } ++ } ++ break; ++ case INTERPOSE_REVAL: ++ /* Do nothing. */ ++ break; ++ default: ++ printk(KERN_CRIT "unionfs: invalid interpose flag passed!\n"); ++ BUG(); ++ } ++ goto out; ++ ++out_spliced: ++ if (!err) ++ return spliced; ++out: ++ return ERR_PTR(err); ++} ++ ++/* like interpose above, but for an already existing dentry */ ++void unionfs_reinterpose(struct dentry *dentry) ++{ ++ struct dentry *lower_dentry; ++ struct inode *inode; ++ int bindex, bstart, bend; ++ ++ verify_locked(dentry); ++ ++ /* This is pre-allocated inode */ ++ inode = dentry->d_inode; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry) ++ continue; ++ ++ if (!lower_dentry->d_inode) ++ continue; ++ if (unionfs_lower_inode_idx(inode, bindex)) ++ continue; ++ unionfs_set_lower_inode_idx(inode, bindex, ++ igrab(lower_dentry->d_inode)); ++ } ++ ibstart(inode) = dbstart(dentry); ++ ibend(inode) = dbend(dentry); ++} ++ ++/* ++ * make sure the branch we just looked up (nd) makes sense: ++ * ++ * 1) we're not trying to stack unionfs on top of unionfs ++ * 2) it exists ++ * 3) is a directory ++ */ ++int check_branch(struct nameidata *nd) ++{ ++ /* XXX: remove in ODF code -- stacking unions allowed there */ ++ if (!strcmp(nd->path.dentry->d_sb->s_type->name, UNIONFS_NAME)) ++ return -EINVAL; ++ if (!nd->path.dentry->d_inode) ++ return -ENOENT; ++ if (!S_ISDIR(nd->path.dentry->d_inode->i_mode)) ++ return -ENOTDIR; ++ return 0; ++} ++ ++/* checks if two lower_dentries have overlapping branches */ ++static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2) ++{ ++ struct dentry *dent = NULL; ++ ++ dent = dent1; ++ while ((dent != dent2) && (dent->d_parent != dent)) ++ dent = dent->d_parent; ++ ++ if (dent == dent2) ++ return 1; ++ ++ dent = dent2; ++ while ((dent != dent1) && (dent->d_parent != dent)) ++ dent = dent->d_parent; ++ ++ return (dent == dent1); ++} ++ ++/* ++ * Parse "ro" or "rw" options, but default to "rw" if no mode options was ++ * specified. Fill the mode bits in @perms. If encounter an unknown ++ * string, return -EINVAL. Otherwise return 0. ++ */ ++int parse_branch_mode(const char *name, int *perms) ++{ ++ if (!name || !strcmp(name, "rw")) { ++ *perms = MAY_READ | MAY_WRITE; ++ return 0; ++ } ++ if (!strcmp(name, "ro")) { ++ *perms = MAY_READ; ++ return 0; ++ } ++ return -EINVAL; ++} ++ ++/* ++ * parse the dirs= mount argument ++ * ++ * We don't need to lock the superblock private data's rwsem, as we get ++ * called only by unionfs_read_super - it is still a long time before anyone ++ * can even get a reference to us. ++ */ ++static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info ++ *lower_root_info, char *options) ++{ ++ struct nameidata nd; ++ char *name; ++ int err = 0; ++ int branches = 1; ++ int bindex = 0; ++ int i = 0; ++ int j = 0; ++ struct dentry *dent1; ++ struct dentry *dent2; ++ ++ if (options[0] == '\0') { ++ printk(KERN_ERR "unionfs: no branches specified\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * Each colon means we have a separator, this is really just a rough ++ * guess, since strsep will handle empty fields for us. ++ */ ++ for (i = 0; options[i]; i++) ++ if (options[i] == ':') ++ branches++; ++ ++ /* allocate space for underlying pointers to lower dentry */ ++ UNIONFS_SB(sb)->data = ++ kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL); ++ if (unlikely(!UNIONFS_SB(sb)->data)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ lower_root_info->lower_paths = ++ kcalloc(branches, sizeof(struct path), GFP_KERNEL); ++ if (unlikely(!lower_root_info->lower_paths)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */ ++ branches = 0; ++ while ((name = strsep(&options, ":")) != NULL) { ++ int perms; ++ char *mode = strchr(name, '='); ++ ++ if (!name) ++ continue; ++ if (!*name) { /* bad use of ':' (extra colons) */ ++ err = -EINVAL; ++ goto out; ++ } ++ ++ branches++; ++ ++ /* strip off '=' if any */ ++ if (mode) ++ *mode++ = '\0'; ++ ++ err = parse_branch_mode(mode, &perms); ++ if (err) { ++ printk(KERN_ERR "unionfs: invalid mode \"%s\" for " ++ "branch %d\n", mode, bindex); ++ goto out; ++ } ++ /* ensure that leftmost branch is writeable */ ++ if (!bindex && !(perms & MAY_WRITE)) { ++ printk(KERN_ERR "unionfs: leftmost branch cannot be " ++ "read-only (use \"-o ro\" to create a " ++ "read-only union)\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = path_lookup(name, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory '%s' (error %d)\n", ++ name, err); ++ goto out; ++ } ++ ++ err = check_branch(&nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: lower directory " ++ "'%s' is not a valid branch\n", name); ++ path_put(&nd.path); ++ goto out; ++ } ++ ++ lower_root_info->lower_paths[bindex].dentry = nd.path.dentry; ++ lower_root_info->lower_paths[bindex].mnt = nd.path.mnt; ++ ++ set_branchperms(sb, bindex, perms); ++ set_branch_count(sb, bindex, 0); ++ new_branch_id(sb, bindex); ++ ++ if (lower_root_info->bstart < 0) ++ lower_root_info->bstart = bindex; ++ lower_root_info->bend = bindex; ++ bindex++; ++ } ++ ++ if (branches == 0) { ++ printk(KERN_ERR "unionfs: no branches specified\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ BUG_ON(branches != (lower_root_info->bend + 1)); ++ ++ /* ++ * Ensure that no overlaps exist in the branches. ++ * ++ * This test is required because the Linux kernel has no support ++ * currently for ensuring coherency between stackable layers and ++ * branches. If we were to allow overlapping branches, it would be ++ * possible, for example, to delete a file via one branch, which ++ * would not be reflected in another branch. Such incoherency could ++ * lead to inconsistencies and even kernel oopses. Rather than ++ * implement hacks to work around some of these cache-coherency ++ * problems, we prevent branch overlapping, for now. A complete ++ * solution will involve proper kernel/VFS support for cache ++ * coherency, at which time we could safely remove this ++ * branch-overlapping test. ++ */ ++ for (i = 0; i < branches; i++) { ++ dent1 = lower_root_info->lower_paths[i].dentry; ++ for (j = i + 1; j < branches; j++) { ++ dent2 = lower_root_info->lower_paths[j].dentry; ++ if (is_branch_overlap(dent1, dent2)) { ++ printk(KERN_ERR "unionfs: branches %d and " ++ "%d overlap\n", i, j); ++ err = -EINVAL; ++ goto out; ++ } ++ } ++ } ++ ++out: ++ if (err) { ++ for (i = 0; i < branches; i++) ++ path_put(&lower_root_info->lower_paths[i]); ++ ++ kfree(lower_root_info->lower_paths); ++ kfree(UNIONFS_SB(sb)->data); ++ ++ /* ++ * MUST clear the pointers to prevent potential double free if ++ * the caller dies later on ++ */ ++ lower_root_info->lower_paths = NULL; ++ UNIONFS_SB(sb)->data = NULL; ++ } ++ return err; ++} ++ ++/* ++ * Parse mount options. See the manual page for usage instructions. ++ * ++ * Returns the dentry object of the lower-level (lower) directory; ++ * We want to mount our stackable file system on top of that lower directory. ++ */ ++static struct unionfs_dentry_info *unionfs_parse_options( ++ struct super_block *sb, ++ char *options) ++{ ++ struct unionfs_dentry_info *lower_root_info; ++ char *optname; ++ int err = 0; ++ int bindex; ++ int dirsfound = 0; ++ ++ /* allocate private data area */ ++ err = -ENOMEM; ++ lower_root_info = ++ kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL); ++ if (unlikely(!lower_root_info)) ++ goto out_error; ++ lower_root_info->bstart = -1; ++ lower_root_info->bend = -1; ++ lower_root_info->bopaque = -1; ++ ++ while ((optname = strsep(&options, ",")) != NULL) { ++ char *optarg; ++ ++ if (!optname || !*optname) ++ continue; ++ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ ++ /* ++ * All of our options take an argument now. Insert ones that ++ * don't, above this check. ++ */ ++ if (!optarg) { ++ printk(KERN_ERR "unionfs: %s requires an argument\n", ++ optname); ++ err = -EINVAL; ++ goto out_error; ++ } ++ ++ if (!strcmp("dirs", optname)) { ++ if (++dirsfound > 1) { ++ printk(KERN_ERR ++ "unionfs: multiple dirs specified\n"); ++ err = -EINVAL; ++ goto out_error; ++ } ++ err = parse_dirs_option(sb, lower_root_info, optarg); ++ if (err) ++ goto out_error; ++ continue; ++ } ++ ++ err = -EINVAL; ++ printk(KERN_ERR ++ "unionfs: unrecognized option '%s'\n", optname); ++ goto out_error; ++ } ++ if (dirsfound != 1) { ++ printk(KERN_ERR "unionfs: dirs option required\n"); ++ err = -EINVAL; ++ goto out_error; ++ } ++ goto out; ++ ++out_error: ++ if (lower_root_info && lower_root_info->lower_paths) { ++ for (bindex = lower_root_info->bstart; ++ bindex >= 0 && bindex <= lower_root_info->bend; ++ bindex++) ++ path_put(&lower_root_info->lower_paths[bindex]); ++ } ++ ++ kfree(lower_root_info->lower_paths); ++ kfree(lower_root_info); ++ ++ kfree(UNIONFS_SB(sb)->data); ++ UNIONFS_SB(sb)->data = NULL; ++ ++ lower_root_info = ERR_PTR(err); ++out: ++ return lower_root_info; ++} ++ ++/* ++ * our custom d_alloc_root work-alike ++ * ++ * we can't use d_alloc_root if we want to use our own interpose function ++ * unchanged, so we simply call our own "fake" d_alloc_root ++ */ ++static struct dentry *unionfs_d_alloc_root(struct super_block *sb) ++{ ++ struct dentry *ret = NULL; ++ ++ if (sb) { ++ static const struct qstr name = { ++ .name = "/", ++ .len = 1 ++ }; ++ ++ ret = d_alloc(NULL, &name); ++ if (likely(ret)) { ++ ret->d_op = &unionfs_dops; ++ ret->d_sb = sb; ++ ret->d_parent = ret; ++ } ++ } ++ return ret; ++} ++ ++/* ++ * There is no need to lock the unionfs_super_info's rwsem as there is no ++ * way anyone can have a reference to the superblock at this point in time. ++ */ ++static int unionfs_read_super(struct super_block *sb, void *raw_data, ++ int silent) ++{ ++ int err = 0; ++ struct unionfs_dentry_info *lower_root_info = NULL; ++ int bindex, bstart, bend; ++ ++ if (!raw_data) { ++ printk(KERN_ERR ++ "unionfs: read_super: missing data argument\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* Allocate superblock private data */ ++ sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL); ++ if (unlikely(!UNIONFS_SB(sb))) { ++ printk(KERN_CRIT "unionfs: read_super: out of memory\n"); ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ UNIONFS_SB(sb)->bend = -1; ++ atomic_set(&UNIONFS_SB(sb)->generation, 1); ++ init_rwsem(&UNIONFS_SB(sb)->rwsem); ++ UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */ ++ ++ lower_root_info = unionfs_parse_options(sb, raw_data); ++ if (IS_ERR(lower_root_info)) { ++ printk(KERN_ERR ++ "unionfs: read_super: error while parsing options " ++ "(err = %ld)\n", PTR_ERR(lower_root_info)); ++ err = PTR_ERR(lower_root_info); ++ lower_root_info = NULL; ++ goto out_free; ++ } ++ if (lower_root_info->bstart == -1) { ++ err = -ENOENT; ++ goto out_free; ++ } ++ ++ /* set the lower superblock field of upper superblock */ ++ bstart = lower_root_info->bstart; ++ BUG_ON(bstart != 0); ++ sbend(sb) = bend = lower_root_info->bend; ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct dentry *d = lower_root_info->lower_paths[bindex].dentry; ++ atomic_inc(&d->d_sb->s_active); ++ unionfs_set_lower_super_idx(sb, bindex, d->d_sb); ++ } ++ ++ /* max Bytes is the maximum bytes from highest priority branch */ ++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; ++ ++ /* ++ * Our c/m/atime granularity is 1 ns because we may stack on file ++ * systems whose granularity is as good. This is important for our ++ * time-based cache coherency. ++ */ ++ sb->s_time_gran = 1; ++ ++ sb->s_op = &unionfs_sops; ++ ++ /* See comment next to the definition of unionfs_d_alloc_root */ ++ sb->s_root = unionfs_d_alloc_root(sb); ++ if (unlikely(!sb->s_root)) { ++ err = -ENOMEM; ++ goto out_dput; ++ } ++ ++ /* link the upper and lower dentries */ ++ sb->s_root->d_fsdata = NULL; ++ err = new_dentry_private_data(sb->s_root, UNIONFS_DMUTEX_ROOT); ++ if (unlikely(err)) ++ goto out_freedpd; ++ ++ /* Set the lower dentries for s_root */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct dentry *d; ++ struct vfsmount *m; ++ ++ d = lower_root_info->lower_paths[bindex].dentry; ++ m = lower_root_info->lower_paths[bindex].mnt; ++ ++ unionfs_set_lower_dentry_idx(sb->s_root, bindex, d); ++ unionfs_set_lower_mnt_idx(sb->s_root, bindex, m); ++ } ++ dbstart(sb->s_root) = bstart; ++ dbend(sb->s_root) = bend; ++ ++ /* Set the generation number to one, since this is for the mount. */ ++ atomic_set(&UNIONFS_D(sb->s_root)->generation, 1); ++ ++ /* ++ * Call interpose to create the upper level inode. Only ++ * INTERPOSE_LOOKUP can return a value other than 0 on err. ++ */ ++ err = PTR_ERR(unionfs_interpose(sb->s_root, sb, 0)); ++ unionfs_unlock_dentry(sb->s_root); ++ if (!err) ++ goto out; ++ /* else fall through */ ++ ++out_freedpd: ++ if (UNIONFS_D(sb->s_root)) { ++ kfree(UNIONFS_D(sb->s_root)->lower_paths); ++ free_dentry_private_data(sb->s_root); ++ } ++ dput(sb->s_root); ++ ++out_dput: ++ if (lower_root_info && !IS_ERR(lower_root_info)) { ++ for (bindex = lower_root_info->bstart; ++ bindex <= lower_root_info->bend; bindex++) { ++ struct dentry *d; ++ d = lower_root_info->lower_paths[bindex].dentry; ++ /* drop refs we took earlier */ ++ atomic_dec(&d->d_sb->s_active); ++ path_put(&lower_root_info->lower_paths[bindex]); ++ } ++ kfree(lower_root_info->lower_paths); ++ kfree(lower_root_info); ++ lower_root_info = NULL; ++ } ++ ++out_free: ++ kfree(UNIONFS_SB(sb)->data); ++ kfree(UNIONFS_SB(sb)); ++ sb->s_fs_info = NULL; ++ ++out: ++ if (lower_root_info && !IS_ERR(lower_root_info)) { ++ kfree(lower_root_info->lower_paths); ++ kfree(lower_root_info); ++ } ++ return err; ++} ++ ++static int unionfs_get_sb(struct file_system_type *fs_type, ++ int flags, const char *dev_name, ++ void *raw_data, struct vfsmount *mnt) ++{ ++ int err; ++ err = get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt); ++ if (!err) ++ UNIONFS_SB(mnt->mnt_sb)->dev_name = ++ kstrdup(dev_name, GFP_KERNEL); ++ return err; ++} ++ ++static struct file_system_type unionfs_fs_type = { ++ .owner = THIS_MODULE, ++ .name = UNIONFS_NAME, ++ .get_sb = unionfs_get_sb, ++ .kill_sb = generic_shutdown_super, ++ .fs_flags = FS_REVAL_DOT, ++}; ++ ++static int __init init_unionfs_fs(void) ++{ ++ int err; ++ ++ pr_info("Registering unionfs " UNIONFS_VERSION "\n"); ++ ++ err = unionfs_init_filldir_cache(); ++ if (unlikely(err)) ++ goto out; ++ err = unionfs_init_inode_cache(); ++ if (unlikely(err)) ++ goto out; ++ err = unionfs_init_dentry_cache(); ++ if (unlikely(err)) ++ goto out; ++ err = init_sioq(); ++ if (unlikely(err)) ++ goto out; ++ err = register_filesystem(&unionfs_fs_type); ++out: ++ if (unlikely(err)) { ++ stop_sioq(); ++ unionfs_destroy_filldir_cache(); ++ unionfs_destroy_inode_cache(); ++ unionfs_destroy_dentry_cache(); ++ } ++ return err; ++} ++ ++static void __exit exit_unionfs_fs(void) ++{ ++ stop_sioq(); ++ unionfs_destroy_filldir_cache(); ++ unionfs_destroy_inode_cache(); ++ unionfs_destroy_dentry_cache(); ++ unregister_filesystem(&unionfs_fs_type); ++ pr_info("Completed unionfs module unload\n"); ++} ++ ++MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" ++ " (http://www.fsl.cs.sunysb.edu)"); ++MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION ++ " (http://unionfs.filesystems.org)"); ++MODULE_LICENSE("GPL"); ++ ++module_init(init_unionfs_fs); ++module_exit(exit_unionfs_fs); +diff --git a/fs/unionfs/mmap.c b/fs/unionfs/mmap.c +new file mode 100644 +index 0000000..1f70535 +--- /dev/null ++++ b/fs/unionfs/mmap.c +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2006 Shaya Potter ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++ ++/* ++ * XXX: we need a dummy readpage handler because generic_file_mmap (which we ++ * use in unionfs_mmap) checks for the existence of ++ * mapping->a_ops->readpage, else it returns -ENOEXEC. The VFS will need to ++ * be fixed to allow a file system to define vm_ops->fault without any ++ * address_space_ops whatsoever. ++ * ++ * Otherwise, we don't want to use our readpage method at all. ++ */ ++static int unionfs_readpage(struct file *file, struct page *page) ++{ ++ BUG(); ++ return -EINVAL; ++} ++ ++static int unionfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++ int err; ++ struct file *file, *lower_file; ++ const struct vm_operations_struct *lower_vm_ops; ++ struct vm_area_struct lower_vma; ++ ++ BUG_ON(!vma); ++ memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); ++ file = lower_vma.vm_file; ++ lower_vm_ops = UNIONFS_F(file)->lower_vm_ops; ++ BUG_ON(!lower_vm_ops); ++ ++ lower_file = unionfs_lower_file(file); ++ BUG_ON(!lower_file); ++ /* ++ * XXX: vm_ops->fault may be called in parallel. Because we have to ++ * resort to temporarily changing the vma->vm_file to point to the ++ * lower file, a concurrent invocation of unionfs_fault could see a ++ * different value. In this workaround, we keep a different copy of ++ * the vma structure in our stack, so we never expose a different ++ * value of the vma->vm_file called to us, even temporarily. A ++ * better fix would be to change the calling semantics of ->fault to ++ * take an explicit file pointer. ++ */ ++ lower_vma.vm_file = lower_file; ++ err = lower_vm_ops->fault(&lower_vma, vmf); ++ return err; ++} ++ ++/* ++ * XXX: the default address_space_ops for unionfs is empty. We cannot set ++ * our inode->i_mapping->a_ops to NULL because too many code paths expect ++ * the a_ops vector to be non-NULL. ++ */ ++struct address_space_operations unionfs_aops = { ++ /* empty on purpose */ ++}; ++ ++/* ++ * XXX: we need a second, dummy address_space_ops vector, to be used ++ * temporarily during unionfs_mmap, because the latter calls ++ * generic_file_mmap, which checks if ->readpage exists, else returns ++ * -ENOEXEC. ++ */ ++struct address_space_operations unionfs_dummy_aops = { ++ .readpage = unionfs_readpage, ++}; ++ ++struct vm_operations_struct unionfs_vm_ops = { ++ .fault = unionfs_fault, ++}; +diff --git a/fs/unionfs/rdstate.c b/fs/unionfs/rdstate.c +new file mode 100644 +index 0000000..f745fbc +--- /dev/null ++++ b/fs/unionfs/rdstate.c +@@ -0,0 +1,285 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* This file contains the routines for maintaining readdir state. */ ++ ++/* ++ * There are two structures here, rdstate which is a hash table ++ * of the second structure which is a filldir_node. ++ */ ++ ++/* ++ * This is a struct kmem_cache for filldir nodes, because we allocate a lot ++ * of them and they shouldn't waste memory. If the node has a small name ++ * (as defined by the dentry structure), then we use an inline name to ++ * preserve kmalloc space. ++ */ ++static struct kmem_cache *unionfs_filldir_cachep; ++ ++int unionfs_init_filldir_cache(void) ++{ ++ unionfs_filldir_cachep = ++ kmem_cache_create("unionfs_filldir", ++ sizeof(struct filldir_node), 0, ++ SLAB_RECLAIM_ACCOUNT, NULL); ++ ++ return (unionfs_filldir_cachep ? 0 : -ENOMEM); ++} ++ ++void unionfs_destroy_filldir_cache(void) ++{ ++ if (unionfs_filldir_cachep) ++ kmem_cache_destroy(unionfs_filldir_cachep); ++} ++ ++/* ++ * This is a tuning parameter that tells us roughly how big to make the ++ * hash table in directory entries per page. This isn't perfect, but ++ * at least we get a hash table size that shouldn't be too overloaded. ++ * The following averages are based on my home directory. ++ * 14.44693 Overall ++ * 12.29 Single Page Directories ++ * 117.93 Multi-page directories ++ */ ++#define DENTPAGE 4096 ++#define DENTPERONEPAGE 12 ++#define DENTPERPAGE 118 ++#define MINHASHSIZE 1 ++static int guesstimate_hash_size(struct inode *inode) ++{ ++ struct inode *lower_inode; ++ int bindex; ++ int hashsize = MINHASHSIZE; ++ ++ if (UNIONFS_I(inode)->hashsize > 0) ++ return UNIONFS_I(inode)->hashsize; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ ++ if (i_size_read(lower_inode) == DENTPAGE) ++ hashsize += DENTPERONEPAGE; ++ else ++ hashsize += (i_size_read(lower_inode) / DENTPAGE) * ++ DENTPERPAGE; ++ } ++ ++ return hashsize; ++} ++ ++int init_rdstate(struct file *file) ++{ ++ BUG_ON(sizeof(loff_t) != ++ (sizeof(unsigned int) + sizeof(unsigned int))); ++ BUG_ON(UNIONFS_F(file)->rdstate != NULL); ++ ++ UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_path.dentry->d_inode, ++ fbstart(file)); ++ ++ return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM); ++} ++ ++struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos) ++{ ++ struct unionfs_dir_state *rdstate = NULL; ++ struct list_head *pos; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ list_for_each(pos, &UNIONFS_I(inode)->readdircache) { ++ struct unionfs_dir_state *r = ++ list_entry(pos, struct unionfs_dir_state, cache); ++ if (fpos == rdstate2offset(r)) { ++ UNIONFS_I(inode)->rdcount--; ++ list_del(&r->cache); ++ rdstate = r; ++ break; ++ } ++ } ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ return rdstate; ++} ++ ++struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex) ++{ ++ int i = 0; ++ int hashsize; ++ unsigned long mallocsize = sizeof(struct unionfs_dir_state); ++ struct unionfs_dir_state *rdstate; ++ ++ hashsize = guesstimate_hash_size(inode); ++ mallocsize += hashsize * sizeof(struct list_head); ++ mallocsize = __roundup_pow_of_two(mallocsize); ++ ++ /* This should give us about 500 entries anyway. */ ++ if (mallocsize > PAGE_SIZE) ++ mallocsize = PAGE_SIZE; ++ ++ hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) / ++ sizeof(struct list_head); ++ ++ rdstate = kmalloc(mallocsize, GFP_KERNEL); ++ if (unlikely(!rdstate)) ++ return NULL; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1)) ++ UNIONFS_I(inode)->cookie = 1; ++ else ++ UNIONFS_I(inode)->cookie++; ++ ++ rdstate->cookie = UNIONFS_I(inode)->cookie; ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ rdstate->offset = 1; ++ rdstate->access = jiffies; ++ rdstate->bindex = bindex; ++ rdstate->dirpos = 0; ++ rdstate->hashentries = 0; ++ rdstate->size = hashsize; ++ for (i = 0; i < rdstate->size; i++) ++ INIT_LIST_HEAD(&rdstate->list[i]); ++ ++ return rdstate; ++} ++ ++static void free_filldir_node(struct filldir_node *node) ++{ ++ if (node->namelen >= DNAME_INLINE_LEN_MIN) ++ kfree(node->name); ++ kmem_cache_free(unionfs_filldir_cachep, node); ++} ++ ++void free_rdstate(struct unionfs_dir_state *state) ++{ ++ struct filldir_node *tmp; ++ int i; ++ ++ for (i = 0; i < state->size; i++) { ++ struct list_head *head = &(state->list[i]); ++ struct list_head *pos, *n; ++ ++ /* traverse the list and deallocate space */ ++ list_for_each_safe(pos, n, head) { ++ tmp = list_entry(pos, struct filldir_node, file_list); ++ list_del(&tmp->file_list); ++ free_filldir_node(tmp); ++ } ++ } ++ ++ kfree(state); ++} ++ ++struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen, ++ int is_whiteout) ++{ ++ int index; ++ unsigned int hash; ++ struct list_head *head; ++ struct list_head *pos; ++ struct filldir_node *cursor = NULL; ++ int found = 0; ++ ++ BUG_ON(namelen <= 0); ++ ++ hash = full_name_hash(name, namelen); ++ index = hash % rdstate->size; ++ ++ head = &(rdstate->list[index]); ++ list_for_each(pos, head) { ++ cursor = list_entry(pos, struct filldir_node, file_list); ++ ++ if (cursor->namelen == namelen && cursor->hash == hash && ++ !strncmp(cursor->name, name, namelen)) { ++ /* ++ * a duplicate exists, and hence no need to create ++ * entry to the list ++ */ ++ found = 1; ++ ++ /* ++ * if a duplicate is found in this branch, and is ++ * not due to the caller looking for an entry to ++ * whiteout, then the file system may be corrupted. ++ */ ++ if (unlikely(!is_whiteout && ++ cursor->bindex == rdstate->bindex)) ++ printk(KERN_ERR "unionfs: filldir: possible " ++ "I/O error: a file is duplicated " ++ "in the same branch %d: %s\n", ++ rdstate->bindex, cursor->name); ++ break; ++ } ++ } ++ ++ if (!found) ++ cursor = NULL; ++ ++ return cursor; ++} ++ ++int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name, ++ int namelen, int bindex, int whiteout) ++{ ++ struct filldir_node *new; ++ unsigned int hash; ++ int index; ++ int err = 0; ++ struct list_head *head; ++ ++ BUG_ON(namelen <= 0); ++ ++ hash = full_name_hash(name, namelen); ++ index = hash % rdstate->size; ++ head = &(rdstate->list[index]); ++ ++ new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL); ++ if (unlikely(!new)) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&new->file_list); ++ new->namelen = namelen; ++ new->hash = hash; ++ new->bindex = bindex; ++ new->whiteout = whiteout; ++ ++ if (namelen < DNAME_INLINE_LEN_MIN) { ++ new->name = new->iname; ++ } else { ++ new->name = kmalloc(namelen + 1, GFP_KERNEL); ++ if (unlikely(!new->name)) { ++ kmem_cache_free(unionfs_filldir_cachep, new); ++ new = NULL; ++ goto out; ++ } ++ } ++ ++ memcpy(new->name, name, namelen); ++ new->name[namelen] = '\0'; ++ ++ rdstate->hashentries++; ++ ++ list_add(&(new->file_list), head); ++out: ++ return err; ++} +diff --git a/fs/unionfs/rename.c b/fs/unionfs/rename.c +new file mode 100644 +index 0000000..936700e +--- /dev/null ++++ b/fs/unionfs/rename.c +@@ -0,0 +1,517 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * This is a helper function for rename, used when rename ends up with hosed ++ * over dentries and we need to revert. ++ */ ++static int unionfs_refresh_lower_dentry(struct dentry *dentry, ++ struct dentry *parent, int bindex) ++{ ++ struct dentry *lower_dentry; ++ struct dentry *lower_parent; ++ int err = 0; ++ ++ verify_locked(dentry); ++ ++ lower_parent = unionfs_lower_dentry_idx(parent, bindex); ++ ++ BUG_ON(!S_ISDIR(lower_parent->d_inode->i_mode)); ++ ++ lower_dentry = lookup_one_len(dentry->d_name.name, lower_parent, ++ dentry->d_name.len); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ goto out; ++ } ++ ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex)); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL); ++ ++ if (!lower_dentry->d_inode) { ++ dput(lower_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } else { ++ unionfs_set_lower_dentry_idx(dentry, bindex, lower_dentry); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, ++ igrab(lower_dentry->d_inode)); ++ } ++ ++out: ++ return err; ++} ++ ++static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct dentry *old_parent, ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct dentry *new_parent, ++ int bindex) ++{ ++ int err = 0; ++ struct dentry *lower_old_dentry; ++ struct dentry *lower_new_dentry; ++ struct dentry *lower_old_dir_dentry; ++ struct dentry *lower_new_dir_dentry; ++ struct dentry *trap; ++ ++ lower_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); ++ lower_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex); ++ ++ if (!lower_new_dentry) { ++ lower_new_dentry = ++ create_parents(new_parent->d_inode, ++ new_dentry, new_dentry->d_name.name, ++ bindex); ++ if (IS_ERR(lower_new_dentry)) { ++ err = PTR_ERR(lower_new_dentry); ++ if (IS_COPYUP_ERR(err)) ++ goto out; ++ printk(KERN_ERR "unionfs: error creating directory " ++ "tree for rename, bindex=%d err=%d\n", ++ bindex, err); ++ goto out; ++ } ++ } ++ ++ /* check for and remove whiteout, if any */ ++ err = check_unlink_whiteout(new_dentry, lower_new_dentry, bindex); ++ if (err > 0) /* ignore if whiteout found and successfully removed */ ++ err = 0; ++ if (err) ++ goto out; ++ ++ /* check of old_dentry branch is writable */ ++ err = is_robranch_super(old_dentry->d_sb, bindex); ++ if (err) ++ goto out; ++ ++ dget(lower_old_dentry); ++ dget(lower_new_dentry); ++ lower_old_dir_dentry = dget_parent(lower_old_dentry); ++ lower_new_dir_dentry = dget_parent(lower_new_dentry); ++ ++ trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); ++ /* source should not be ancenstor of target */ ++ if (trap == lower_old_dentry) { ++ err = -EINVAL; ++ goto out_err_unlock; ++ } ++ /* target should not be ancenstor of source */ ++ if (trap == lower_new_dentry) { ++ err = -ENOTEMPTY; ++ goto out_err_unlock; ++ } ++ err = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, ++ lower_new_dir_dentry->d_inode, lower_new_dentry); ++out_err_unlock: ++ if (!err) { ++ /* update parent dir times */ ++ fsstack_copy_attr_times(old_dir, lower_old_dir_dentry->d_inode); ++ fsstack_copy_attr_times(new_dir, lower_new_dir_dentry->d_inode); ++ } ++ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); ++ ++ dput(lower_old_dir_dentry); ++ dput(lower_new_dir_dentry); ++ dput(lower_old_dentry); ++ dput(lower_new_dentry); ++ ++out: ++ if (!err) { ++ /* Fixup the new_dentry. */ ++ if (bindex < dbstart(new_dentry)) ++ dbstart(new_dentry) = bindex; ++ else if (bindex > dbend(new_dentry)) ++ dbend(new_dentry) = bindex; ++ } ++ ++ return err; ++} ++ ++/* ++ * Main rename code. This is sufficiently complex, that it's documented in ++ * Documentation/filesystems/unionfs/rename.txt. This routine calls ++ * __unionfs_rename() above to perform some of the work. ++ */ ++static int do_unionfs_rename(struct inode *old_dir, ++ struct dentry *old_dentry, ++ struct dentry *old_parent, ++ struct inode *new_dir, ++ struct dentry *new_dentry, ++ struct dentry *new_parent) ++{ ++ int err = 0; ++ int bindex; ++ int old_bstart, old_bend; ++ int new_bstart, new_bend; ++ int do_copyup = -1; ++ int local_err = 0; ++ int eio = 0; ++ int revert = 0; ++ ++ old_bstart = dbstart(old_dentry); ++ old_bend = dbend(old_dentry); ++ ++ new_bstart = dbstart(new_dentry); ++ new_bend = dbend(new_dentry); ++ ++ /* Rename source to destination. */ ++ err = __unionfs_rename(old_dir, old_dentry, old_parent, ++ new_dir, new_dentry, new_parent, ++ old_bstart); ++ if (err) { ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ do_copyup = old_bstart - 1; ++ } else { ++ revert = 1; ++ } ++ ++ /* ++ * Unlink all instances of destination that exist to the left of ++ * bstart of source. On error, revert back, goto out. ++ */ ++ for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) { ++ struct dentry *unlink_dentry; ++ struct dentry *unlink_dir_dentry; ++ ++ BUG_ON(bindex < 0); ++ unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); ++ if (!unlink_dentry) ++ continue; ++ ++ unlink_dir_dentry = lock_parent(unlink_dentry); ++ err = is_robranch_super(old_dir->i_sb, bindex); ++ if (!err) ++ err = vfs_unlink(unlink_dir_dentry->d_inode, ++ unlink_dentry); ++ ++ fsstack_copy_attr_times(new_parent->d_inode, ++ unlink_dir_dentry->d_inode); ++ /* propagate number of hard-links */ ++ new_parent->d_inode->i_nlink = ++ unionfs_get_nlinks(new_parent->d_inode); ++ ++ unlock_dir(unlink_dir_dentry); ++ if (!err) { ++ if (bindex != new_bstart) { ++ dput(unlink_dentry); ++ unionfs_set_lower_dentry_idx(new_dentry, ++ bindex, NULL); ++ } ++ } else if (IS_COPYUP_ERR(err)) { ++ do_copyup = bindex - 1; ++ } else if (revert) { ++ goto revert; ++ } ++ } ++ ++ if (do_copyup != -1) { ++ for (bindex = do_copyup; bindex >= 0; bindex--) { ++ /* ++ * copyup the file into some left directory, so that ++ * you can rename it ++ */ ++ err = copyup_dentry(old_parent->d_inode, ++ old_dentry, old_bstart, bindex, ++ old_dentry->d_name.name, ++ old_dentry->d_name.len, NULL, ++ i_size_read(old_dentry->d_inode)); ++ /* if copyup failed, try next branch to the left */ ++ if (err) ++ continue; ++ /* ++ * create whiteout before calling __unionfs_rename ++ * because the latter will change the old_dentry's ++ * lower name and parent dir, resulting in the ++ * whiteout getting created in the wrong dir. ++ */ ++ err = create_whiteout(old_dentry, bindex); ++ if (err) { ++ printk(KERN_ERR "unionfs: can't create a " ++ "whiteout for %s in rename (err=%d)\n", ++ old_dentry->d_name.name, err); ++ continue; ++ } ++ err = __unionfs_rename(old_dir, old_dentry, old_parent, ++ new_dir, new_dentry, new_parent, ++ bindex); ++ break; ++ } ++ } ++ ++ /* make it opaque */ ++ if (S_ISDIR(old_dentry->d_inode->i_mode)) { ++ err = make_dir_opaque(old_dentry, dbstart(old_dentry)); ++ if (err) ++ goto revert; ++ } ++ ++ /* ++ * Create whiteout for source, only if: ++ * (1) There is more than one underlying instance of source. ++ * (We did a copy_up is taken care of above). ++ */ ++ if ((old_bstart != old_bend) && (do_copyup == -1)) { ++ err = create_whiteout(old_dentry, old_bstart); ++ if (err) { ++ /* can't fix anything now, so we exit with -EIO */ ++ printk(KERN_ERR "unionfs: can't create a whiteout for " ++ "%s in rename!\n", old_dentry->d_name.name); ++ err = -EIO; ++ } ++ } ++ ++out: ++ return err; ++ ++revert: ++ /* Do revert here. */ ++ local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent, ++ old_bstart); ++ if (local_err) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the new refresh failed\n"); ++ eio = -EIO; ++ } ++ ++ local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent, ++ old_bstart); ++ if (local_err) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the old refresh failed\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ if (!unionfs_lower_dentry_idx(new_dentry, bindex) || ++ !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the object disappeared from under us!\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ if (unionfs_lower_dentry_idx(old_dentry, bindex) && ++ unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) { ++ printk(KERN_ERR "unionfs: revert failed in rename: " ++ "the object was created underneath us!\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ local_err = __unionfs_rename(new_dir, new_dentry, new_parent, ++ old_dir, old_dentry, old_parent, ++ old_bstart); ++ ++ /* If we can't fix it, then we cop-out with -EIO. */ ++ if (local_err) { ++ printk(KERN_ERR "unionfs: revert failed in rename!\n"); ++ eio = -EIO; ++ } ++ ++ local_err = unionfs_refresh_lower_dentry(new_dentry, new_parent, ++ bindex); ++ if (local_err) ++ eio = -EIO; ++ local_err = unionfs_refresh_lower_dentry(old_dentry, old_parent, ++ bindex); ++ if (local_err) ++ eio = -EIO; ++ ++revert_out: ++ if (eio) ++ err = eio; ++ return err; ++} ++ ++/* ++ * We can't copyup a directory, because it may involve huge numbers of ++ * children, etc. Doing that in the kernel would be bad, so instead we ++ * return EXDEV to the user-space utility that caused this, and let the ++ * user-space recurse and ask us to copy up each file separately. ++ */ ++static int may_rename_dir(struct dentry *dentry, struct dentry *parent) ++{ ++ int err, bstart; ++ ++ err = check_empty(dentry, parent, NULL); ++ if (err == -ENOTEMPTY) { ++ if (is_robranch(dentry)) ++ return -EXDEV; ++ } else if (err) { ++ return err; ++ } ++ ++ bstart = dbstart(dentry); ++ if (dbend(dentry) == bstart || dbopaque(dentry) == bstart) ++ return 0; ++ ++ dbstart(dentry) = bstart + 1; ++ err = check_empty(dentry, parent, NULL); ++ dbstart(dentry) = bstart; ++ if (err == -ENOTEMPTY) ++ err = -EXDEV; ++ return err; ++} ++ ++/* ++ * The locking rules in unionfs_rename are complex. We could use a simpler ++ * superblock-level name-space lock for renames and copy-ups. ++ */ ++int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry) ++{ ++ int err = 0; ++ struct dentry *wh_dentry; ++ struct dentry *old_parent, *new_parent; ++ int valid = true; ++ ++ unionfs_read_lock(old_dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ old_parent = dget_parent(old_dentry); ++ new_parent = dget_parent(new_dentry); ++ /* un/lock parent dentries only if they differ from old/new_dentry */ ++ if (old_parent != old_dentry && ++ old_parent != new_dentry) ++ unionfs_lock_dentry(old_parent, UNIONFS_DMUTEX_REVAL_PARENT); ++ if (new_parent != old_dentry && ++ new_parent != new_dentry && ++ new_parent != old_parent) ++ unionfs_lock_dentry(new_parent, UNIONFS_DMUTEX_REVAL_CHILD); ++ unionfs_double_lock_dentry(old_dentry, new_dentry); ++ ++ valid = __unionfs_d_revalidate(old_dentry, old_parent, false); ++ if (!valid) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (!d_deleted(new_dentry) && new_dentry->d_inode) { ++ valid = __unionfs_d_revalidate(new_dentry, new_parent, false); ++ if (!valid) { ++ err = -ESTALE; ++ goto out; ++ } ++ } ++ ++ if (!S_ISDIR(old_dentry->d_inode->i_mode)) ++ err = unionfs_partial_lookup(old_dentry, old_parent); ++ else ++ err = may_rename_dir(old_dentry, old_parent); ++ ++ if (err) ++ goto out; ++ ++ err = unionfs_partial_lookup(new_dentry, new_parent); ++ if (err) ++ goto out; ++ ++ /* ++ * if new_dentry is already lower because of whiteout, ++ * simply override it even if the whited-out dir is not empty. ++ */ ++ wh_dentry = find_first_whiteout(new_dentry); ++ if (!IS_ERR(wh_dentry)) { ++ dput(wh_dentry); ++ } else if (new_dentry->d_inode) { ++ if (S_ISDIR(old_dentry->d_inode->i_mode) != ++ S_ISDIR(new_dentry->d_inode->i_mode)) { ++ err = S_ISDIR(old_dentry->d_inode->i_mode) ? ++ -ENOTDIR : -EISDIR; ++ goto out; ++ } ++ ++ if (S_ISDIR(new_dentry->d_inode->i_mode)) { ++ struct unionfs_dir_state *namelist = NULL; ++ /* check if this unionfs directory is empty or not */ ++ err = check_empty(new_dentry, new_parent, &namelist); ++ if (err) ++ goto out; ++ ++ if (!is_robranch(new_dentry)) ++ err = delete_whiteouts(new_dentry, ++ dbstart(new_dentry), ++ namelist); ++ ++ free_rdstate(namelist); ++ ++ if (err) ++ goto out; ++ } ++ } ++ ++ err = do_unionfs_rename(old_dir, old_dentry, old_parent, ++ new_dir, new_dentry, new_parent); ++ if (err) ++ goto out; ++ ++ /* ++ * force re-lookup since the dir on ro branch is not renamed, and ++ * lower dentries still indicate the un-renamed ones. ++ */ ++ if (S_ISDIR(old_dentry->d_inode->i_mode)) ++ atomic_dec(&UNIONFS_D(old_dentry)->generation); ++ else ++ unionfs_postcopyup_release(old_dentry); ++ if (new_dentry->d_inode && !S_ISDIR(new_dentry->d_inode->i_mode)) { ++ unionfs_postcopyup_release(new_dentry); ++ unionfs_postcopyup_setmnt(new_dentry); ++ if (!unionfs_lower_inode(new_dentry->d_inode)) { ++ /* ++ * If we get here, it means that no copyup was ++ * needed, and that a file by the old name already ++ * existing on the destination branch; that file got ++ * renamed earlier in this function, so all we need ++ * to do here is set the lower inode. ++ */ ++ struct inode *inode; ++ inode = unionfs_lower_inode(old_dentry->d_inode); ++ igrab(inode); ++ unionfs_set_lower_inode_idx(new_dentry->d_inode, ++ dbstart(new_dentry), ++ inode); ++ } ++ } ++ /* if all of this renaming succeeded, update our times */ ++ unionfs_copy_attr_times(old_dentry->d_inode); ++ unionfs_copy_attr_times(new_dentry->d_inode); ++ unionfs_check_inode(old_dir); ++ unionfs_check_inode(new_dir); ++ unionfs_check_dentry(old_dentry); ++ unionfs_check_dentry(new_dentry); ++ ++out: ++ if (err) /* clear the new_dentry stuff created */ ++ d_drop(new_dentry); ++ ++ unionfs_double_unlock_dentry(old_dentry, new_dentry); ++ if (new_parent != old_dentry && ++ new_parent != new_dentry && ++ new_parent != old_parent) ++ unionfs_unlock_dentry(new_parent); ++ if (old_parent != old_dentry && ++ old_parent != new_dentry) ++ unionfs_unlock_dentry(old_parent); ++ dput(new_parent); ++ dput(old_parent); ++ unionfs_read_unlock(old_dentry->d_sb); ++ ++ return err; ++} +diff --git a/fs/unionfs/sioq.c b/fs/unionfs/sioq.c +new file mode 100644 +index 0000000..760c580 +--- /dev/null ++++ b/fs/unionfs/sioq.c +@@ -0,0 +1,101 @@ ++/* ++ * Copyright (c) 2006-2010 Erez Zadok ++ * Copyright (c) 2006 Charles P. Wright ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006 Junjiro Okajima ++ * Copyright (c) 2006 David P. Quigley ++ * Copyright (c) 2006-2010 Stony Brook University ++ * Copyright (c) 2006-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Super-user IO work Queue - sometimes we need to perform actions which ++ * would fail due to the unix permissions on the parent directory (e.g., ++ * rmdir a directory which appears empty, but in reality contains ++ * whiteouts). ++ */ ++ ++static struct workqueue_struct *superio_workqueue; ++ ++int __init init_sioq(void) ++{ ++ int err; ++ ++ superio_workqueue = create_workqueue("unionfs_siod"); ++ if (!IS_ERR(superio_workqueue)) ++ return 0; ++ ++ err = PTR_ERR(superio_workqueue); ++ printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err); ++ superio_workqueue = NULL; ++ return err; ++} ++ ++void stop_sioq(void) ++{ ++ if (superio_workqueue) ++ destroy_workqueue(superio_workqueue); ++} ++ ++void run_sioq(work_func_t func, struct sioq_args *args) ++{ ++ INIT_WORK(&args->work, func); ++ ++ init_completion(&args->comp); ++ while (!queue_work(superio_workqueue, &args->work)) { ++ /* TODO: do accounting if needed */ ++ schedule(); ++ } ++ wait_for_completion(&args->comp); ++} ++ ++void __unionfs_create(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct create_args *c = &args->create; ++ ++ args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd); ++ complete(&args->comp); ++} ++ ++void __unionfs_mkdir(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct mkdir_args *m = &args->mkdir; ++ ++ args->err = vfs_mkdir(m->parent, m->dentry, m->mode); ++ complete(&args->comp); ++} ++ ++void __unionfs_mknod(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct mknod_args *m = &args->mknod; ++ ++ args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev); ++ complete(&args->comp); ++} ++ ++void __unionfs_symlink(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct symlink_args *s = &args->symlink; ++ ++ args->err = vfs_symlink(s->parent, s->dentry, s->symbuf); ++ complete(&args->comp); ++} ++ ++void __unionfs_unlink(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct unlink_args *u = &args->unlink; ++ ++ args->err = vfs_unlink(u->parent, u->dentry); ++ complete(&args->comp); ++} +diff --git a/fs/unionfs/sioq.h b/fs/unionfs/sioq.h +new file mode 100644 +index 0000000..b26d248 +--- /dev/null ++++ b/fs/unionfs/sioq.h +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (c) 2006-2010 Erez Zadok ++ * Copyright (c) 2006 Charles P. Wright ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006 Junjiro Okajima ++ * Copyright (c) 2006 David P. Quigley ++ * Copyright (c) 2006-2010 Stony Brook University ++ * Copyright (c) 2006-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _SIOQ_H ++#define _SIOQ_H ++ ++struct deletewh_args { ++ struct unionfs_dir_state *namelist; ++ struct dentry *dentry; ++ int bindex; ++}; ++ ++struct is_opaque_args { ++ struct dentry *dentry; ++}; ++ ++struct create_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++ struct nameidata *nd; ++}; ++ ++struct mkdir_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++}; ++ ++struct mknod_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++ dev_t dev; ++}; ++ ++struct symlink_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ char *symbuf; ++}; ++ ++struct unlink_args { ++ struct inode *parent; ++ struct dentry *dentry; ++}; ++ ++ ++struct sioq_args { ++ struct completion comp; ++ struct work_struct work; ++ int err; ++ void *ret; ++ ++ union { ++ struct deletewh_args deletewh; ++ struct is_opaque_args is_opaque; ++ struct create_args create; ++ struct mkdir_args mkdir; ++ struct mknod_args mknod; ++ struct symlink_args symlink; ++ struct unlink_args unlink; ++ }; ++}; ++ ++/* Extern definitions for SIOQ functions */ ++extern int __init init_sioq(void); ++extern void stop_sioq(void); ++extern void run_sioq(work_func_t func, struct sioq_args *args); ++ ++/* Extern definitions for our privilege escalation helpers */ ++extern void __unionfs_create(struct work_struct *work); ++extern void __unionfs_mkdir(struct work_struct *work); ++extern void __unionfs_mknod(struct work_struct *work); ++extern void __unionfs_symlink(struct work_struct *work); ++extern void __unionfs_unlink(struct work_struct *work); ++extern void __delete_whiteouts(struct work_struct *work); ++extern void __is_opaque_dir(struct work_struct *work); ++ ++#endif /* not _SIOQ_H */ +diff --git a/fs/unionfs/subr.c b/fs/unionfs/subr.c +new file mode 100644 +index 0000000..570a344 +--- /dev/null ++++ b/fs/unionfs/subr.c +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * returns the right n_link value based on the inode type ++ */ ++int unionfs_get_nlinks(const struct inode *inode) ++{ ++ /* don't bother to do all the work since we're unlinked */ ++ if (inode->i_nlink == 0) ++ return 0; ++ ++ if (!S_ISDIR(inode->i_mode)) ++ return unionfs_lower_inode(inode)->i_nlink; ++ ++ /* ++ * For directories, we return 1. The only place that could cares ++ * about links is readdir, and there's d_type there so even that ++ * doesn't matter. ++ */ ++ return 1; ++} ++ ++/* copy a/m/ctime from the lower branch with the newest times */ ++void unionfs_copy_attr_times(struct inode *upper) ++{ ++ int bindex; ++ struct inode *lower; ++ ++ if (!upper) ++ return; ++ if (ibstart(upper) < 0) { ++#ifdef CONFIG_UNION_FS_DEBUG ++ WARN_ON(ibstart(upper) < 0); ++#endif /* CONFIG_UNION_FS_DEBUG */ ++ return; ++ } ++ for (bindex = ibstart(upper); bindex <= ibend(upper); bindex++) { ++ lower = unionfs_lower_inode_idx(upper, bindex); ++ if (!lower) ++ continue; /* not all lower dir objects may exist */ ++ if (unlikely(timespec_compare(&upper->i_mtime, ++ &lower->i_mtime) < 0)) ++ upper->i_mtime = lower->i_mtime; ++ if (unlikely(timespec_compare(&upper->i_ctime, ++ &lower->i_ctime) < 0)) ++ upper->i_ctime = lower->i_ctime; ++ if (unlikely(timespec_compare(&upper->i_atime, ++ &lower->i_atime) < 0)) ++ upper->i_atime = lower->i_atime; ++ } ++} ++ ++/* ++ * A unionfs/fanout version of fsstack_copy_attr_all. Uses a ++ * unionfs_get_nlinks to properly calcluate the number of links to a file. ++ * Also, copies the max() of all a/m/ctimes for all lower inodes (which is ++ * important if the lower inode is a directory type) ++ */ ++void unionfs_copy_attr_all(struct inode *dest, ++ const struct inode *src) ++{ ++ dest->i_mode = src->i_mode; ++ dest->i_uid = src->i_uid; ++ dest->i_gid = src->i_gid; ++ dest->i_rdev = src->i_rdev; ++ ++ unionfs_copy_attr_times(dest); ++ ++ dest->i_blkbits = src->i_blkbits; ++ dest->i_flags = src->i_flags; ++ ++ /* ++ * Update the nlinks AFTER updating the above fields, because the ++ * get_links callback may depend on them. ++ */ ++ dest->i_nlink = unionfs_get_nlinks(dest); ++} +diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c +new file mode 100644 +index 0000000..45bb9bf +--- /dev/null ++++ b/fs/unionfs/super.c +@@ -0,0 +1,1029 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * The inode cache is used with alloc_inode for both our inode info and the ++ * vfs inode. ++ */ ++static struct kmem_cache *unionfs_inode_cachep; ++ ++struct inode *unionfs_iget(struct super_block *sb, unsigned long ino) ++{ ++ int size; ++ struct unionfs_inode_info *info; ++ struct inode *inode; ++ ++ inode = iget_locked(sb, ino); ++ if (!inode) ++ return ERR_PTR(-ENOMEM); ++ if (!(inode->i_state & I_NEW)) ++ return inode; ++ ++ info = UNIONFS_I(inode); ++ memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode)); ++ info->bstart = -1; ++ info->bend = -1; ++ atomic_set(&info->generation, ++ atomic_read(&UNIONFS_SB(inode->i_sb)->generation)); ++ spin_lock_init(&info->rdlock); ++ info->rdcount = 1; ++ info->hashsize = -1; ++ INIT_LIST_HEAD(&info->readdircache); ++ ++ size = sbmax(inode->i_sb) * sizeof(struct inode *); ++ info->lower_inodes = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!info->lower_inodes)) { ++ printk(KERN_CRIT "unionfs: no kernel memory when allocating " ++ "lower-pointer array!\n"); ++ iget_failed(inode); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ inode->i_version++; ++ inode->i_op = &unionfs_main_iops; ++ inode->i_fop = &unionfs_main_fops; ++ ++ inode->i_mapping->a_ops = &unionfs_aops; ++ ++ /* ++ * reset times so unionfs_copy_attr_all can keep out time invariants ++ * right (upper inode time being the max of all lower ones). ++ */ ++ inode->i_atime.tv_sec = inode->i_atime.tv_nsec = 0; ++ inode->i_mtime.tv_sec = inode->i_mtime.tv_nsec = 0; ++ inode->i_ctime.tv_sec = inode->i_ctime.tv_nsec = 0; ++ unlock_new_inode(inode); ++ return inode; ++} ++ ++/* ++ * final actions when unmounting a file system ++ * ++ * No need to lock rwsem. ++ */ ++static void unionfs_put_super(struct super_block *sb) ++{ ++ int bindex, bstart, bend; ++ struct unionfs_sb_info *spd; ++ int leaks = 0; ++ ++ spd = UNIONFS_SB(sb); ++ if (!spd) ++ return; ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ ++ /* Make sure we have no leaks of branchget/branchput. */ ++ for (bindex = bstart; bindex <= bend; bindex++) ++ if (unlikely(branch_count(sb, bindex) != 0)) { ++ printk(KERN_CRIT ++ "unionfs: branch %d has %d references left!\n", ++ bindex, branch_count(sb, bindex)); ++ leaks = 1; ++ } ++ WARN_ON(leaks != 0); ++ ++ /* decrement lower super references */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct super_block *s; ++ s = unionfs_lower_super_idx(sb, bindex); ++ unionfs_set_lower_super_idx(sb, bindex, NULL); ++ atomic_dec(&s->s_active); ++ } ++ ++ kfree(spd->dev_name); ++ kfree(spd->data); ++ kfree(spd); ++ sb->s_fs_info = NULL; ++} ++ ++/* ++ * Since people use this to answer the "How big of a file can I write?" ++ * question, we report the size of the highest priority branch as the size of ++ * the union. ++ */ ++static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ int err = 0; ++ struct super_block *sb; ++ struct dentry *lower_dentry; ++ struct dentry *parent; ++ struct path lower_path; ++ bool valid; ++ ++ sb = dentry->d_sb; ++ ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ unionfs_check_dentry(dentry); ++ ++ lower_dentry = unionfs_lower_dentry(sb->s_root); ++ lower_path.dentry = lower_dentry; ++ lower_path.mnt = unionfs_mntget(sb->s_root, 0); ++ err = vfs_statfs(&lower_path, buf); ++ mntput(lower_path.mnt); ++ ++ /* set return buf to our f/s to avoid confusing user-level utils */ ++ buf->f_type = UNIONFS_SUPER_MAGIC; ++ /* ++ * Our maximum file name can is shorter by a few bytes because every ++ * file name could potentially be whited-out. ++ * ++ * XXX: this restriction goes away with ODF. ++ */ ++ unionfs_set_max_namelen(&buf->f_namelen); ++ ++ /* ++ * reset two fields to avoid confusing user-land. ++ * XXX: is this still necessary? ++ */ ++ memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t)); ++ memset(&buf->f_spare, 0, sizeof(buf->f_spare)); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(sb); ++ return err; ++} ++ ++/* handle mode changing during remount */ ++static noinline_for_stack int do_remount_mode_option( ++ char *optarg, ++ int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths) ++{ ++ int err = -EINVAL; ++ int perms, idx; ++ char *modename = strchr(optarg, '='); ++ struct nameidata nd; ++ ++ /* by now, optarg contains the branch name */ ++ if (!*optarg) { ++ printk(KERN_ERR ++ "unionfs: no branch specified for mode change\n"); ++ goto out; ++ } ++ if (!modename) { ++ printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n", ++ optarg); ++ goto out; ++ } ++ *modename++ = '\0'; ++ err = parse_branch_mode(modename, &perms); ++ if (err) { ++ printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n", ++ modename, optarg); ++ goto out; ++ } ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx = 0; idx < cur_branches; idx++) ++ if (nd.path.mnt == new_lower_paths[idx].mnt && ++ nd.path.dentry == new_lower_paths[idx].dentry) ++ break; ++ path_put(&nd.path); /* no longer needed */ ++ if (idx == cur_branches) { ++ err = -ENOENT; /* err may have been reset above */ ++ printk(KERN_ERR "unionfs: branch \"%s\" " ++ "not found\n", optarg); ++ goto out; ++ } ++ /* check/change mode for existing branch */ ++ /* we don't warn if perms==branchperms */ ++ new_data[idx].branchperms = perms; ++ err = 0; ++out: ++ return err; ++} ++ ++/* handle branch deletion during remount */ ++static noinline_for_stack int do_remount_del_option( ++ char *optarg, int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths) ++{ ++ int err = -EINVAL; ++ int idx; ++ struct nameidata nd; ++ ++ /* optarg contains the branch name to delete */ ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx = 0; idx < cur_branches; idx++) ++ if (nd.path.mnt == new_lower_paths[idx].mnt && ++ nd.path.dentry == new_lower_paths[idx].dentry) ++ break; ++ path_put(&nd.path); /* no longer needed */ ++ if (idx == cur_branches) { ++ printk(KERN_ERR "unionfs: branch \"%s\" " ++ "not found\n", optarg); ++ err = -ENOENT; ++ goto out; ++ } ++ /* check if there are any open files on the branch to be deleted */ ++ if (atomic_read(&new_data[idx].open_files) > 0) { ++ err = -EBUSY; ++ goto out; ++ } ++ ++ /* ++ * Now we have to delete the branch. First, release any handles it ++ * has. Then, move the remaining array indexes past "idx" in ++ * new_data and new_lower_paths one to the left. Finally, adjust ++ * cur_branches. ++ */ ++ path_put(&new_lower_paths[idx]); ++ ++ if (idx < cur_branches - 1) { ++ /* if idx==cur_branches-1, we delete last branch: easy */ ++ memmove(&new_data[idx], &new_data[idx+1], ++ (cur_branches - 1 - idx) * ++ sizeof(struct unionfs_data)); ++ memmove(&new_lower_paths[idx], &new_lower_paths[idx+1], ++ (cur_branches - 1 - idx) * sizeof(struct path)); ++ } ++ ++ err = 0; ++out: ++ return err; ++} ++ ++/* handle branch insertion during remount */ ++static noinline_for_stack int do_remount_add_option( ++ char *optarg, int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths, ++ int *high_branch_id) ++{ ++ int err = -EINVAL; ++ int perms; ++ int idx = 0; /* default: insert at beginning */ ++ char *new_branch , *modename = NULL; ++ struct nameidata nd; ++ ++ /* ++ * optarg can be of several forms: ++ * ++ * /bar:/foo insert /foo before /bar ++ * /bar:/foo=ro insert /foo in ro mode before /bar ++ * /foo insert /foo in the beginning (prepend) ++ * :/foo insert /foo at the end (append) ++ */ ++ if (*optarg == ':') { /* append? */ ++ new_branch = optarg + 1; /* skip ':' */ ++ idx = cur_branches; ++ goto found_insertion_point; ++ } ++ new_branch = strchr(optarg, ':'); ++ if (!new_branch) { /* prepend? */ ++ new_branch = optarg; ++ goto found_insertion_point; ++ } ++ *new_branch++ = '\0'; /* holds path+mode of new branch */ ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx = 0; idx < cur_branches; idx++) ++ if (nd.path.mnt == new_lower_paths[idx].mnt && ++ nd.path.dentry == new_lower_paths[idx].dentry) ++ break; ++ path_put(&nd.path); /* no longer needed */ ++ if (idx == cur_branches) { ++ printk(KERN_ERR "unionfs: branch \"%s\" " ++ "not found\n", optarg); ++ err = -ENOENT; ++ goto out; ++ } ++ ++ /* ++ * At this point idx will hold the index where the new branch should ++ * be inserted before. ++ */ ++found_insertion_point: ++ /* find the mode for the new branch */ ++ if (new_branch) ++ modename = strchr(new_branch, '='); ++ if (modename) ++ *modename++ = '\0'; ++ if (!new_branch || !*new_branch) { ++ printk(KERN_ERR "unionfs: null new branch\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ err = parse_branch_mode(modename, &perms); ++ if (err) { ++ printk(KERN_ERR "unionfs: invalid mode \"%s\" for " ++ "branch \"%s\"\n", modename, new_branch); ++ goto out; ++ } ++ err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: error accessing " ++ "lower directory \"%s\" (error %d)\n", ++ new_branch, err); ++ goto out; ++ } ++ /* ++ * It's probably safe to check_mode the new branch to insert. Note: ++ * we don't allow inserting branches which are unionfs's by ++ * themselves (check_branch returns EINVAL in that case). This is ++ * because this code base doesn't support stacking unionfs: the ODF ++ * code base supports that correctly. ++ */ ++ err = check_branch(&nd); ++ if (err) { ++ printk(KERN_ERR "unionfs: lower directory " ++ "\"%s\" is not a valid branch\n", optarg); ++ path_put(&nd.path); ++ goto out; ++ } ++ ++ /* ++ * Now we have to insert the new branch. But first, move the bits ++ * to make space for the new branch, if needed. Finally, adjust ++ * cur_branches. ++ * We don't release nd here; it's kept until umount/remount. ++ */ ++ if (idx < cur_branches) { ++ /* if idx==cur_branches, we append: easy */ ++ memmove(&new_data[idx+1], &new_data[idx], ++ (cur_branches - idx) * sizeof(struct unionfs_data)); ++ memmove(&new_lower_paths[idx+1], &new_lower_paths[idx], ++ (cur_branches - idx) * sizeof(struct path)); ++ } ++ new_lower_paths[idx].dentry = nd.path.dentry; ++ new_lower_paths[idx].mnt = nd.path.mnt; ++ ++ new_data[idx].sb = nd.path.dentry->d_sb; ++ atomic_set(&new_data[idx].open_files, 0); ++ new_data[idx].branchperms = perms; ++ new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */ ++ ++ err = 0; ++out: ++ return err; ++} ++ ++ ++/* ++ * Support branch management options on remount. ++ * ++ * See Documentation/filesystems/unionfs/ for details. ++ * ++ * @flags: numeric mount options ++ * @options: mount options string ++ * ++ * This function can rearrange a mounted union dynamically, adding and ++ * removing branches, including changing branch modes. Clearly this has to ++ * be done safely and atomically. Luckily, the VFS already calls this ++ * function with lock_super(sb) and lock_kernel() held, preventing ++ * concurrent mixing of new mounts, remounts, and unmounts. Moreover, ++ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb) ++ * to purge dentries/inodes from our superblock, and also called ++ * fsync_super(sb) to purge any dirty pages. So we're good. ++ * ++ * XXX: however, our remount code may also need to invalidate mapped pages ++ * so as to force them to be re-gotten from the (newly reconfigured) lower ++ * branches. This has to wait for proper mmap and cache coherency support ++ * in the VFS. ++ * ++ */ ++static int unionfs_remount_fs(struct super_block *sb, int *flags, ++ char *options) ++{ ++ int err = 0; ++ int i; ++ char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */ ++ char *optname; ++ int cur_branches = 0; /* no. of current branches */ ++ int new_branches = 0; /* no. of branches actually left in the end */ ++ int add_branches; /* est. no. of branches to add */ ++ int del_branches; /* est. no. of branches to del */ ++ int max_branches; /* max possible no. of branches */ ++ struct unionfs_data *new_data = NULL, *tmp_data = NULL; ++ struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL; ++ struct inode **new_lower_inodes = NULL; ++ int new_high_branch_id; /* new high branch ID */ ++ int size; /* memory allocation size, temp var */ ++ int old_ibstart, old_ibend; ++ ++ unionfs_write_lock(sb); ++ ++ /* ++ * The VFS will take care of "ro" and "rw" flags, and we can safely ++ * ignore MS_SILENT, but anything else left over is an error. So we ++ * need to check if any other flags may have been passed (none are ++ * allowed/supported as of now). ++ */ ++ if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) { ++ printk(KERN_ERR ++ "unionfs: remount flags 0x%x unsupported\n", *flags); ++ err = -EINVAL; ++ goto out_error; ++ } ++ ++ /* ++ * If 'options' is NULL, it's probably because the user just changed ++ * the union to a "ro" or "rw" and the VFS took care of it. So ++ * nothing to do and we're done. ++ */ ++ if (!options || options[0] == '\0') ++ goto out_error; ++ ++ /* ++ * Find out how many branches we will have in the end, counting ++ * "add" and "del" commands. Copy the "options" string because ++ * strsep modifies the string and we need it later. ++ */ ++ tmp_to_free = kstrdup(options, GFP_KERNEL); ++ optionstmp = tmp_to_free; ++ if (unlikely(!optionstmp)) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ cur_branches = sbmax(sb); /* current no. branches */ ++ new_branches = sbmax(sb); ++ del_branches = 0; ++ add_branches = 0; ++ new_high_branch_id = sbhbid(sb); /* save current high_branch_id */ ++ while ((optname = strsep(&optionstmp, ",")) != NULL) { ++ char *optarg; ++ ++ if (!optname || !*optname) ++ continue; ++ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ ++ if (!strcmp("add", optname)) ++ add_branches++; ++ else if (!strcmp("del", optname)) ++ del_branches++; ++ } ++ kfree(tmp_to_free); ++ /* after all changes, will we have at least one branch left? */ ++ if ((new_branches + add_branches - del_branches) < 1) { ++ printk(KERN_ERR ++ "unionfs: no branches left after remount\n"); ++ err = -EINVAL; ++ goto out_free; ++ } ++ ++ /* ++ * Since we haven't actually parsed all the add/del options, nor ++ * have we checked them for errors, we don't know for sure how many ++ * branches we will have after all changes have taken place. In ++ * fact, the total number of branches left could be less than what ++ * we have now. So we need to allocate space for a temporary ++ * placeholder that is at least as large as the maximum number of ++ * branches we *could* have, which is the current number plus all ++ * the additions. Once we're done with these temp placeholders, we ++ * may have to re-allocate the final size, copy over from the temp, ++ * and then free the temps (done near the end of this function). ++ */ ++ max_branches = cur_branches + add_branches; ++ /* allocate space for new pointers to lower dentry */ ++ tmp_data = kcalloc(max_branches, ++ sizeof(struct unionfs_data), GFP_KERNEL); ++ if (unlikely(!tmp_data)) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ /* allocate space for new pointers to lower paths */ ++ tmp_lower_paths = kcalloc(max_branches, ++ sizeof(struct path), GFP_KERNEL); ++ if (unlikely(!tmp_lower_paths)) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ /* copy current info into new placeholders, incrementing refcnts */ ++ memcpy(tmp_data, UNIONFS_SB(sb)->data, ++ cur_branches * sizeof(struct unionfs_data)); ++ memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths, ++ cur_branches * sizeof(struct path)); ++ for (i = 0; i < cur_branches; i++) ++ path_get(&tmp_lower_paths[i]); /* drop refs at end of fxn */ ++ ++ /******************************************************************* ++ * For each branch command, do path_lookup on the requested branch, ++ * and apply the change to a temp branch list. To handle errors, we ++ * already dup'ed the old arrays (above), and increased the refcnts ++ * on various f/s objects. So now we can do all the path_lookups ++ * and branch-management commands on the new arrays. If it fail mid ++ * way, we free the tmp arrays and *put all objects. If we succeed, ++ * then we free old arrays and *put its objects, and then replace ++ * the arrays with the new tmp list (we may have to re-allocate the ++ * memory because the temp lists could have been larger than what we ++ * actually needed). ++ *******************************************************************/ ++ ++ while ((optname = strsep(&options, ",")) != NULL) { ++ char *optarg; ++ ++ if (!optname || !*optname) ++ continue; ++ /* ++ * At this stage optname holds a comma-delimited option, but ++ * without the commas. Next, we need to break the string on ++ * the '=' symbol to separate CMD=ARG, where ARG itself can ++ * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode", ++ * KEY is "/foo", and VAL is "rw". ++ */ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ /* incgen remount option (instead of old ioctl) */ ++ if (!strcmp("incgen", optname)) { ++ err = 0; ++ goto out_no_change; ++ } ++ ++ /* ++ * All of our options take an argument now. (Insert ones ++ * that don't above this check.) So at this stage optname ++ * contains the CMD part and optarg contains the ARG part. ++ */ ++ if (!optarg || !*optarg) { ++ printk(KERN_ERR "unionfs: all remount options require " ++ "an argument (%s)\n", optname); ++ err = -EINVAL; ++ goto out_release; ++ } ++ ++ if (!strcmp("add", optname)) { ++ err = do_remount_add_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths, ++ &new_high_branch_id); ++ if (err) ++ goto out_release; ++ new_branches++; ++ if (new_branches > UNIONFS_MAX_BRANCHES) { ++ printk(KERN_ERR "unionfs: command exceeds " ++ "%d branches\n", UNIONFS_MAX_BRANCHES); ++ err = -E2BIG; ++ goto out_release; ++ } ++ continue; ++ } ++ if (!strcmp("del", optname)) { ++ err = do_remount_del_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths); ++ if (err) ++ goto out_release; ++ new_branches--; ++ continue; ++ } ++ if (!strcmp("mode", optname)) { ++ err = do_remount_mode_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths); ++ if (err) ++ goto out_release; ++ continue; ++ } ++ ++ /* ++ * When you use "mount -o remount,ro", mount(8) will ++ * reportedly pass the original dirs= string from ++ * /proc/mounts. So for now, we have to ignore dirs= and ++ * not consider it an error, unless we want to allow users ++ * to pass dirs= in remount. Note that to allow the VFS to ++ * actually process the ro/rw remount options, we have to ++ * return 0 from this function. ++ */ ++ if (!strcmp("dirs", optname)) { ++ printk(KERN_WARNING ++ "unionfs: remount ignoring option \"%s\"\n", ++ optname); ++ continue; ++ } ++ ++ err = -EINVAL; ++ printk(KERN_ERR ++ "unionfs: unrecognized option \"%s\"\n", optname); ++ goto out_release; ++ } ++ ++out_no_change: ++ ++ /****************************************************************** ++ * WE'RE ALMOST DONE: check if leftmost branch might be read-only, ++ * see if we need to allocate a small-sized new vector, copy the ++ * vectors to their correct place, release the refcnt of the older ++ * ones, and return. Also handle invalidating any pages that will ++ * have to be re-read. ++ *******************************************************************/ ++ ++ if (!(tmp_data[0].branchperms & MAY_WRITE)) { ++ printk(KERN_ERR "unionfs: leftmost branch cannot be read-only " ++ "(use \"remount,ro\" to create a read-only union)\n"); ++ err = -EINVAL; ++ goto out_release; ++ } ++ ++ /* (re)allocate space for new pointers to lower dentry */ ++ size = new_branches * sizeof(struct unionfs_data); ++ new_data = krealloc(tmp_data, size, GFP_KERNEL); ++ if (unlikely(!new_data)) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* allocate space for new pointers to lower paths */ ++ size = new_branches * sizeof(struct path); ++ new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL); ++ if (unlikely(!new_lower_paths)) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* allocate space for new pointers to lower inodes */ ++ new_lower_inodes = kcalloc(new_branches, ++ sizeof(struct inode *), GFP_KERNEL); ++ if (unlikely(!new_lower_inodes)) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* ++ * OK, just before we actually put the new set of branches in place, ++ * we need to ensure that our own f/s has no dirty objects left. ++ * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and ++ * fsync_super(sb), taking care of dentries, inodes, and dirty ++ * pages. So all that's left is for us to invalidate any leftover ++ * (non-dirty) pages to ensure that they will be re-read from the ++ * new lower branches (and to support mmap). ++ */ ++ ++ /* ++ * Once we finish the remounting successfully, our superblock ++ * generation number will have increased. This will be detected by ++ * our dentry-revalidation code upon subsequent f/s operations ++ * through unionfs. The revalidation code will rebuild the union of ++ * lower inodes for a given unionfs inode and invalidate any pages ++ * of such "stale" inodes (by calling our purge_inode_data ++ * function). This revalidation will happen lazily and ++ * incrementally, as users perform operations on cached inodes. We ++ * would like to encourage this revalidation to happen sooner if ++ * possible, so we like to try to invalidate as many other pages in ++ * our superblock as we can. We used to call drop_pagecache_sb() or ++ * a variant thereof, but either method was racy (drop_caches alone ++ * is known to be racy). So now we let the revalidation happen on a ++ * per file basis in ->d_revalidate. ++ */ ++ ++ /* grab new lower super references; release old ones */ ++ for (i = 0; i < new_branches; i++) ++ atomic_inc(&new_data[i].sb->s_active); ++ for (i = 0; i < sbmax(sb); i++) ++ atomic_dec(&UNIONFS_SB(sb)->data[i].sb->s_active); ++ ++ /* copy new vectors into their correct place */ ++ tmp_data = UNIONFS_SB(sb)->data; ++ UNIONFS_SB(sb)->data = new_data; ++ new_data = NULL; /* so don't free good pointers below */ ++ tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths; ++ UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths; ++ new_lower_paths = NULL; /* so don't free good pointers below */ ++ ++ /* update our unionfs_sb_info and root dentry index of last branch */ ++ i = sbmax(sb); /* save no. of branches to release at end */ ++ sbend(sb) = new_branches - 1; ++ dbend(sb->s_root) = new_branches - 1; ++ old_ibstart = ibstart(sb->s_root->d_inode); ++ old_ibend = ibend(sb->s_root->d_inode); ++ ibend(sb->s_root->d_inode) = new_branches - 1; ++ UNIONFS_D(sb->s_root)->bcount = new_branches; ++ new_branches = i; /* no. of branches to release below */ ++ ++ /* ++ * Update lower inodes: 3 steps ++ * 1. grab ref on all new lower inodes ++ */ ++ for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) { ++ struct dentry *lower_dentry = ++ unionfs_lower_dentry_idx(sb->s_root, i); ++ igrab(lower_dentry->d_inode); ++ new_lower_inodes[i] = lower_dentry->d_inode; ++ } ++ /* 2. release reference on all older lower inodes */ ++ iput_lowers(sb->s_root->d_inode, old_ibstart, old_ibend, true); ++ /* 3. update root dentry's inode to new lower_inodes array */ ++ UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes; ++ new_lower_inodes = NULL; ++ ++ /* maxbytes may have changed */ ++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; ++ /* update high branch ID */ ++ sbhbid(sb) = new_high_branch_id; ++ ++ /* update our sb->generation for revalidating objects */ ++ i = atomic_inc_return(&UNIONFS_SB(sb)->generation); ++ atomic_set(&UNIONFS_D(sb->s_root)->generation, i); ++ atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i); ++ if (!(*flags & MS_SILENT)) ++ pr_info("unionfs: %s: new generation number %d\n", ++ UNIONFS_SB(sb)->dev_name, i); ++ /* finally, update the root dentry's times */ ++ unionfs_copy_attr_times(sb->s_root->d_inode); ++ err = 0; /* reset to success */ ++ ++ /* ++ * The code above falls through to the next label, and releases the ++ * refcnts of the older ones (stored in tmp_*): if we fell through ++ * here, it means success. However, if we jump directly to this ++ * label from any error above, then an error occurred after we ++ * grabbed various refcnts, and so we have to release the ++ * temporarily constructed structures. ++ */ ++out_release: ++ /* no need to cleanup/release anything in tmp_data */ ++ if (tmp_lower_paths) ++ for (i = 0; i < new_branches; i++) ++ path_put(&tmp_lower_paths[i]); ++out_free: ++ kfree(tmp_lower_paths); ++ kfree(tmp_data); ++ kfree(new_lower_paths); ++ kfree(new_data); ++ kfree(new_lower_inodes); ++out_error: ++ unionfs_check_dentry(sb->s_root); ++ unionfs_write_unlock(sb); ++ return err; ++} ++ ++/* ++ * Called by iput() when the inode reference count reached zero ++ * and the inode is not hashed anywhere. Used to clear anything ++ * that needs to be, before the inode is completely destroyed and put ++ * on the inode free list. ++ * ++ * No need to lock sb info's rwsem. ++ */ ++static void unionfs_evict_inode(struct inode *inode) ++{ ++ int bindex, bstart, bend; ++ struct inode *lower_inode; ++ struct list_head *pos, *n; ++ struct unionfs_dir_state *rdstate; ++ ++ truncate_inode_pages(&inode->i_data, 0); ++ end_writeback(inode); ++ ++ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { ++ rdstate = list_entry(pos, struct unionfs_dir_state, cache); ++ list_del(&rdstate->cache); ++ free_rdstate(rdstate); ++ } ++ ++ /* ++ * Decrement a reference to a lower_inode, which was incremented ++ * by our read_inode when it was created initially. ++ */ ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ if (bstart >= 0) { ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!lower_inode) ++ continue; ++ unionfs_set_lower_inode_idx(inode, bindex, NULL); ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ iput(lower_inode); ++ lockdep_on(); ++ } ++ } ++ ++ kfree(UNIONFS_I(inode)->lower_inodes); ++ UNIONFS_I(inode)->lower_inodes = NULL; ++} ++ ++static struct inode *unionfs_alloc_inode(struct super_block *sb) ++{ ++ struct unionfs_inode_info *i; ++ ++ i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL); ++ if (unlikely(!i)) ++ return NULL; ++ ++ /* memset everything up to the inode to 0 */ ++ memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode)); ++ ++ i->vfs_inode.i_version = 1; ++ return &i->vfs_inode; ++} ++ ++static void unionfs_destroy_inode(struct inode *inode) ++{ ++ kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode)); ++} ++ ++/* unionfs inode cache constructor */ ++static void init_once(void *obj) ++{ ++ struct unionfs_inode_info *i = obj; ++ ++ inode_init_once(&i->vfs_inode); ++} ++ ++int unionfs_init_inode_cache(void) ++{ ++ int err = 0; ++ ++ unionfs_inode_cachep = ++ kmem_cache_create("unionfs_inode_cache", ++ sizeof(struct unionfs_inode_info), 0, ++ SLAB_RECLAIM_ACCOUNT, init_once); ++ if (unlikely(!unionfs_inode_cachep)) ++ err = -ENOMEM; ++ return err; ++} ++ ++/* unionfs inode cache destructor */ ++void unionfs_destroy_inode_cache(void) ++{ ++ if (unionfs_inode_cachep) ++ kmem_cache_destroy(unionfs_inode_cachep); ++} ++ ++/* ++ * Called when we have a dirty inode, right here we only throw out ++ * parts of our readdir list that are too old. ++ * ++ * No need to grab sb info's rwsem. ++ */ ++static int unionfs_write_inode(struct inode *inode, ++ struct writeback_control *wbc) ++{ ++ struct list_head *pos, *n; ++ struct unionfs_dir_state *rdstate; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { ++ rdstate = list_entry(pos, struct unionfs_dir_state, cache); ++ /* We keep this list in LRU order. */ ++ if ((rdstate->access + RDCACHE_JIFFIES) > jiffies) ++ break; ++ UNIONFS_I(inode)->rdcount--; ++ list_del(&rdstate->cache); ++ free_rdstate(rdstate); ++ } ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ ++ return 0; ++} ++ ++/* ++ * Used only in nfs, to kill any pending RPC tasks, so that subsequent ++ * code can actually succeed and won't leave tasks that need handling. ++ */ ++static void unionfs_umount_begin(struct super_block *sb) ++{ ++ struct super_block *lower_sb; ++ int bindex, bstart, bend; ++ ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD); ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_sb = unionfs_lower_super_idx(sb, bindex); ++ ++ if (lower_sb && lower_sb->s_op && ++ lower_sb->s_op->umount_begin) ++ lower_sb->s_op->umount_begin(lower_sb); ++ } ++ ++ unionfs_read_unlock(sb); ++} ++ ++static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt) ++{ ++ struct super_block *sb = mnt->mnt_sb; ++ int ret = 0; ++ char *tmp_page; ++ char *path; ++ int bindex, bstart, bend; ++ int perms; ++ ++ unionfs_read_lock(sb, UNIONFS_SMUTEX_CHILD); ++ ++ unionfs_lock_dentry(sb->s_root, UNIONFS_DMUTEX_CHILD); ++ ++ tmp_page = (char *) __get_free_page(GFP_KERNEL); ++ if (unlikely(!tmp_page)) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ ++ seq_printf(m, ",dirs="); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct path p; ++ p.dentry = unionfs_lower_dentry_idx(sb->s_root, bindex); ++ p.mnt = unionfs_lower_mnt_idx(sb->s_root, bindex); ++ path = d_path(&p, tmp_page, PAGE_SIZE); ++ if (IS_ERR(path)) { ++ ret = PTR_ERR(path); ++ goto out; ++ } ++ ++ perms = branchperms(sb, bindex); ++ ++ seq_printf(m, "%s=%s", path, ++ perms & MAY_WRITE ? "rw" : "ro"); ++ if (bindex != bend) ++ seq_printf(m, ":"); ++ } ++ ++out: ++ free_page((unsigned long) tmp_page); ++ ++ unionfs_unlock_dentry(sb->s_root); ++ ++ unionfs_read_unlock(sb); ++ ++ return ret; ++} ++ ++struct super_operations unionfs_sops = { ++ .put_super = unionfs_put_super, ++ .statfs = unionfs_statfs, ++ .remount_fs = unionfs_remount_fs, ++ .evict_inode = unionfs_evict_inode, ++ .umount_begin = unionfs_umount_begin, ++ .show_options = unionfs_show_options, ++ .write_inode = unionfs_write_inode, ++ .alloc_inode = unionfs_alloc_inode, ++ .destroy_inode = unionfs_destroy_inode, ++}; +diff --git a/fs/unionfs/union.h b/fs/unionfs/union.h +new file mode 100644 +index 0000000..d49c834 +--- /dev/null ++++ b/fs/unionfs/union.h +@@ -0,0 +1,669 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _UNION_H_ ++#define _UNION_H_ ++ ++#include <linux/dcache.h> ++#include <linux/file.h> ++#include <linux/list.h> ++#include <linux/fs.h> ++#include <linux/mm.h> ++#include <linux/module.h> ++#include <linux/mount.h> ++#include <linux/namei.h> ++#include <linux/page-flags.h> ++#include <linux/pagemap.h> ++#include <linux/poll.h> ++#include <linux/security.h> ++#include <linux/seq_file.h> ++#include <linux/slab.h> ++#include <linux/spinlock.h> ++#include <linux/smp_lock.h> ++#include <linux/statfs.h> ++#include <linux/string.h> ++#include <linux/vmalloc.h> ++#include <linux/writeback.h> ++#include <linux/buffer_head.h> ++#include <linux/xattr.h> ++#include <linux/fs_stack.h> ++#include <linux/magic.h> ++#include <linux/log2.h> ++#include <linux/poison.h> ++#include <linux/mman.h> ++#include <linux/backing-dev.h> ++#include <linux/splice.h> ++ ++#include <asm/system.h> ++ ++#include <linux/union_fs.h> ++ ++/* the file system name */ ++#define UNIONFS_NAME "unionfs" ++ ++/* unionfs root inode number */ ++#define UNIONFS_ROOT_INO 1 ++ ++/* number of times we try to get a unique temporary file name */ ++#define GET_TMPNAM_MAX_RETRY 5 ++ ++/* maximum number of branches we support, to avoid memory blowup */ ++#define UNIONFS_MAX_BRANCHES 128 ++ ++/* minimum time (seconds) required for time-based cache-coherency */ ++#define UNIONFS_MIN_CC_TIME 3 ++ ++/* Operations vectors defined in specific files. */ ++extern struct file_operations unionfs_main_fops; ++extern struct file_operations unionfs_dir_fops; ++extern struct inode_operations unionfs_main_iops; ++extern struct inode_operations unionfs_dir_iops; ++extern struct inode_operations unionfs_symlink_iops; ++extern struct super_operations unionfs_sops; ++extern struct dentry_operations unionfs_dops; ++extern struct address_space_operations unionfs_aops, unionfs_dummy_aops; ++extern struct vm_operations_struct unionfs_vm_ops; ++ ++/* How long should an entry be allowed to persist */ ++#define RDCACHE_JIFFIES (5*HZ) ++ ++/* compatibility with Real-Time patches */ ++#ifdef CONFIG_PREEMPT_RT ++# define unionfs_rw_semaphore compat_rw_semaphore ++#else /* not CONFIG_PREEMPT_RT */ ++# define unionfs_rw_semaphore rw_semaphore ++#endif /* not CONFIG_PREEMPT_RT */ ++ ++/* file private data. */ ++struct unionfs_file_info { ++ int bstart; ++ int bend; ++ atomic_t generation; ++ ++ struct unionfs_dir_state *rdstate; ++ struct file **lower_files; ++ int *saved_branch_ids; /* IDs of branches when file was opened */ ++ const struct vm_operations_struct *lower_vm_ops; ++ bool wrote_to_file; /* for delayed copyup */ ++}; ++ ++/* unionfs inode data in memory */ ++struct unionfs_inode_info { ++ int bstart; ++ int bend; ++ atomic_t generation; ++ /* Stuff for readdir over NFS. */ ++ spinlock_t rdlock; ++ struct list_head readdircache; ++ int rdcount; ++ int hashsize; ++ int cookie; ++ ++ /* The lower inodes */ ++ struct inode **lower_inodes; ++ ++ struct inode vfs_inode; ++}; ++ ++/* unionfs dentry data in memory */ ++struct unionfs_dentry_info { ++ /* ++ * The semaphore is used to lock the dentry as soon as we get into a ++ * unionfs function from the VFS. Our lock ordering is that children ++ * go before their parents. ++ */ ++ struct mutex lock; ++ int bstart; ++ int bend; ++ int bopaque; ++ int bcount; ++ atomic_t generation; ++ struct path *lower_paths; ++}; ++ ++/* These are the pointers to our various objects. */ ++struct unionfs_data { ++ struct super_block *sb; /* lower super_block */ ++ atomic_t open_files; /* number of open files on branch */ ++ int branchperms; ++ int branch_id; /* unique branch ID at re/mount time */ ++}; ++ ++/* unionfs super-block data in memory */ ++struct unionfs_sb_info { ++ int bend; ++ ++ atomic_t generation; ++ ++ /* ++ * This rwsem is used to make sure that a branch management ++ * operation... ++ * 1) will not begin before all currently in-flight operations ++ * complete. ++ * 2) any new operations do not execute until the currently ++ * running branch management operation completes. ++ * ++ * The write_lock_owner records the PID of the task which grabbed ++ * the rw_sem for writing. If the same task also tries to grab the ++ * read lock, we allow it. This prevents a self-deadlock when ++ * branch-management is used on a pivot_root'ed union, because we ++ * have to ->lookup paths which belong to the same union. ++ */ ++ struct unionfs_rw_semaphore rwsem; ++ pid_t write_lock_owner; /* PID of rw_sem owner (write lock) */ ++ int high_branch_id; /* last unique branch ID given */ ++ char *dev_name; /* to identify different unions in pr_debug */ ++ struct unionfs_data *data; ++}; ++ ++/* ++ * structure for making the linked list of entries by readdir on left branch ++ * to compare with entries on right branch ++ */ ++struct filldir_node { ++ struct list_head file_list; /* list for directory entries */ ++ char *name; /* name entry */ ++ int hash; /* name hash */ ++ int namelen; /* name len since name is not 0 terminated */ ++ ++ /* ++ * we can check for duplicate whiteouts and files in the same branch ++ * in order to return -EIO. ++ */ ++ int bindex; ++ ++ /* is this a whiteout entry? */ ++ int whiteout; ++ ++ /* Inline name, so we don't need to separately kmalloc small ones */ ++ char iname[DNAME_INLINE_LEN_MIN]; ++}; ++ ++/* Directory hash table. */ ++struct unionfs_dir_state { ++ unsigned int cookie; /* the cookie, based off of rdversion */ ++ unsigned int offset; /* The entry we have returned. */ ++ int bindex; ++ loff_t dirpos; /* offset within the lower level directory */ ++ int size; /* How big is the hash table? */ ++ int hashentries; /* How many entries have been inserted? */ ++ unsigned long access; ++ ++ /* This cache list is used when the inode keeps us around. */ ++ struct list_head cache; ++ struct list_head list[0]; ++}; ++ ++/* externs needed for fanout.h or sioq.h */ ++extern int unionfs_get_nlinks(const struct inode *inode); ++extern void unionfs_copy_attr_times(struct inode *upper); ++extern void unionfs_copy_attr_all(struct inode *dest, const struct inode *src); ++ ++/* include miscellaneous macros */ ++#include "fanout.h" ++#include "sioq.h" ++ ++/* externs for cache creation/deletion routines */ ++extern void unionfs_destroy_filldir_cache(void); ++extern int unionfs_init_filldir_cache(void); ++extern int unionfs_init_inode_cache(void); ++extern void unionfs_destroy_inode_cache(void); ++extern int unionfs_init_dentry_cache(void); ++extern void unionfs_destroy_dentry_cache(void); ++ ++/* Initialize and free readdir-specific state. */ ++extern int init_rdstate(struct file *file); ++extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode, ++ int bindex); ++extern struct unionfs_dir_state *find_rdstate(struct inode *inode, ++ loff_t fpos); ++extern void free_rdstate(struct unionfs_dir_state *state); ++extern int add_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen, int bindex, ++ int whiteout); ++extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen, ++ int is_whiteout); ++ ++extern struct dentry **alloc_new_dentries(int objs); ++extern struct unionfs_data *alloc_new_data(int objs); ++ ++/* We can only use 32-bits of offset for rdstate --- blech! */ ++#define DIREOF (0xfffff) ++#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */ ++#define MAXRDCOOKIE (0xfff) ++/* Turn an rdstate into an offset. */ ++static inline off_t rdstate2offset(struct unionfs_dir_state *buf) ++{ ++ off_t tmp; ++ ++ tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS) ++ | (buf->offset & DIREOF); ++ return tmp; ++} ++ ++/* Macros for locking a super_block. */ ++enum unionfs_super_lock_class { ++ UNIONFS_SMUTEX_NORMAL, ++ UNIONFS_SMUTEX_PARENT, /* when locking on behalf of file */ ++ UNIONFS_SMUTEX_CHILD, /* when locking on behalf of dentry */ ++}; ++static inline void unionfs_read_lock(struct super_block *sb, int subclass) ++{ ++ if (UNIONFS_SB(sb)->write_lock_owner && ++ UNIONFS_SB(sb)->write_lock_owner == current->pid) ++ return; ++ down_read_nested(&UNIONFS_SB(sb)->rwsem, subclass); ++} ++static inline void unionfs_read_unlock(struct super_block *sb) ++{ ++ if (UNIONFS_SB(sb)->write_lock_owner && ++ UNIONFS_SB(sb)->write_lock_owner == current->pid) ++ return; ++ up_read(&UNIONFS_SB(sb)->rwsem); ++} ++static inline void unionfs_write_lock(struct super_block *sb) ++{ ++ down_write(&UNIONFS_SB(sb)->rwsem); ++ UNIONFS_SB(sb)->write_lock_owner = current->pid; ++} ++static inline void unionfs_write_unlock(struct super_block *sb) ++{ ++ up_write(&UNIONFS_SB(sb)->rwsem); ++ UNIONFS_SB(sb)->write_lock_owner = 0; ++} ++ ++static inline void unionfs_double_lock_dentry(struct dentry *d1, ++ struct dentry *d2) ++{ ++ BUG_ON(d1 == d2); ++ if (d1 < d2) { ++ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_CHILD); ++ } else { ++ unionfs_lock_dentry(d2, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(d1, UNIONFS_DMUTEX_CHILD); ++ } ++} ++ ++static inline void unionfs_double_unlock_dentry(struct dentry *d1, ++ struct dentry *d2) ++{ ++ BUG_ON(d1 == d2); ++ if (d1 < d2) { /* unlock in reverse order than double_lock_dentry */ ++ unionfs_unlock_dentry(d1); ++ unionfs_unlock_dentry(d2); ++ } else { ++ unionfs_unlock_dentry(d2); ++ unionfs_unlock_dentry(d1); ++ } ++} ++ ++static inline void unionfs_double_lock_parents(struct dentry *p1, ++ struct dentry *p2) ++{ ++ if (p1 == p2) { ++ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT); ++ return; ++ } ++ if (p1 < p2) { ++ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_PARENT); ++ unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_CHILD); ++ } else { ++ unionfs_lock_dentry(p2, UNIONFS_DMUTEX_REVAL_PARENT); ++ unionfs_lock_dentry(p1, UNIONFS_DMUTEX_REVAL_CHILD); ++ } ++} ++ ++static inline void unionfs_double_unlock_parents(struct dentry *p1, ++ struct dentry *p2) ++{ ++ if (p1 == p2) { ++ unionfs_unlock_dentry(p1); ++ return; ++ } ++ if (p1 < p2) { /* unlock in reverse order of double_lock_parents */ ++ unionfs_unlock_dentry(p1); ++ unionfs_unlock_dentry(p2); ++ } else { ++ unionfs_unlock_dentry(p2); ++ unionfs_unlock_dentry(p1); ++ } ++} ++ ++extern int new_dentry_private_data(struct dentry *dentry, int subclass); ++extern int realloc_dentry_private_data(struct dentry *dentry); ++extern void free_dentry_private_data(struct dentry *dentry); ++extern void update_bstart(struct dentry *dentry); ++extern int init_lower_nd(struct nameidata *nd, unsigned int flags); ++extern void release_lower_nd(struct nameidata *nd, int err); ++ ++/* ++ * EXTERNALS: ++ */ ++ ++/* replicates the directory structure up to given dentry in given branch */ ++extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry, ++ const char *name, int bindex); ++ ++/* partial lookup */ ++extern int unionfs_partial_lookup(struct dentry *dentry, ++ struct dentry *parent); ++extern struct dentry *unionfs_lookup_full(struct dentry *dentry, ++ struct dentry *parent, ++ int lookupmode); ++ ++/* copies a file from dbstart to newbindex branch */ ++extern int copyup_file(struct inode *dir, struct file *file, int bstart, ++ int newbindex, loff_t size); ++extern int copyup_named_file(struct inode *dir, struct file *file, ++ char *name, int bstart, int new_bindex, ++ loff_t len); ++/* copies a dentry from dbstart to newbindex branch */ ++extern int copyup_dentry(struct inode *dir, struct dentry *dentry, ++ int bstart, int new_bindex, const char *name, ++ int namelen, struct file **copyup_file, loff_t len); ++/* helper functions for post-copyup actions */ ++extern void unionfs_postcopyup_setmnt(struct dentry *dentry); ++extern void unionfs_postcopyup_release(struct dentry *dentry); ++ ++/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */ ++extern int check_empty(struct dentry *dentry, struct dentry *parent, ++ struct unionfs_dir_state **namelist); ++/* whiteout and opaque directory helpers */ ++extern char *alloc_whname(const char *name, int len); ++extern bool is_whiteout_name(char **namep, int *namelenp); ++extern bool is_validname(const char *name); ++extern struct dentry *lookup_whiteout(const char *name, ++ struct dentry *lower_parent); ++extern struct dentry *find_first_whiteout(struct dentry *dentry); ++extern int unlink_whiteout(struct dentry *wh_dentry); ++extern int check_unlink_whiteout(struct dentry *dentry, ++ struct dentry *lower_dentry, int bindex); ++extern int create_whiteout(struct dentry *dentry, int start); ++extern int delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist); ++extern int is_opaque_dir(struct dentry *dentry, int bindex); ++extern int make_dir_opaque(struct dentry *dir, int bindex); ++extern void unionfs_set_max_namelen(long *namelen); ++ ++extern void unionfs_reinterpose(struct dentry *this_dentry); ++extern struct super_block *unionfs_duplicate_super(struct super_block *sb); ++ ++/* Locking functions. */ ++extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl); ++extern int unionfs_getlk(struct file *file, struct file_lock *fl); ++ ++/* Common file operations. */ ++extern int unionfs_file_revalidate(struct file *file, struct dentry *parent, ++ bool willwrite); ++extern int unionfs_open(struct inode *inode, struct file *file); ++extern int unionfs_file_release(struct inode *inode, struct file *file); ++extern int unionfs_flush(struct file *file, fl_owner_t id); ++extern long unionfs_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg); ++extern int unionfs_fsync(struct file *file, int datasync); ++extern int unionfs_fasync(int fd, struct file *file, int flag); ++ ++/* Inode operations */ ++extern struct inode *unionfs_iget(struct super_block *sb, unsigned long ino); ++extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry); ++extern int unionfs_unlink(struct inode *dir, struct dentry *dentry); ++extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry); ++ ++extern bool __unionfs_d_revalidate(struct dentry *dentry, ++ struct dentry *parent, bool willwrite); ++extern bool is_negative_lower(const struct dentry *dentry); ++extern bool is_newer_lower(const struct dentry *dentry); ++extern void purge_sb_data(struct super_block *sb); ++ ++/* The values for unionfs_interpose's flag. */ ++#define INTERPOSE_DEFAULT 0 ++#define INTERPOSE_LOOKUP 1 ++#define INTERPOSE_REVAL 2 ++#define INTERPOSE_REVAL_NEG 3 ++#define INTERPOSE_PARTIAL 4 ++ ++extern struct dentry *unionfs_interpose(struct dentry *this_dentry, ++ struct super_block *sb, int flag); ++ ++#ifdef CONFIG_UNION_FS_XATTR ++/* Extended attribute functions. */ ++extern void *unionfs_xattr_alloc(size_t size, size_t limit); ++static inline void unionfs_xattr_kfree(const void *p) ++{ ++ kfree(p); ++} ++extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, ++ void *value, size_t size); ++extern int unionfs_removexattr(struct dentry *dentry, const char *name); ++extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list, ++ size_t size); ++extern int unionfs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags); ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++/* The root directory is unhashed, but isn't deleted. */ ++static inline int d_deleted(struct dentry *d) ++{ ++ return d_unhashed(d) && (d != d->d_sb->s_root); ++} ++ ++/* unionfs_permission, check if we should bypass error to facilitate copyup */ ++#define IS_COPYUP_ERR(err) ((err) == -EROFS) ++ ++/* unionfs_open, check if we need to copyup the file */ ++#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND) ++#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS) ++ ++static inline int branchperms(const struct super_block *sb, int index) ++{ ++ BUG_ON(index < 0); ++ return UNIONFS_SB(sb)->data[index].branchperms; ++} ++ ++static inline int set_branchperms(struct super_block *sb, int index, int perms) ++{ ++ BUG_ON(index < 0); ++ UNIONFS_SB(sb)->data[index].branchperms = perms; ++ return perms; ++} ++ ++/* check if readonly lower inode, but possibly unlinked (no inode->i_sb) */ ++static inline int __is_rdonly(const struct inode *inode) ++{ ++ /* if unlinked, can't be readonly (?) */ ++ if (!inode->i_sb) ++ return 0; ++ return IS_RDONLY(inode); ++ ++} ++/* Is this file on a read-only branch? */ ++static inline int is_robranch_super(const struct super_block *sb, int index) ++{ ++ int ret; ++ ++ ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0; ++ return ret; ++} ++ ++/* Is this file on a read-only branch? */ ++static inline int is_robranch_idx(const struct dentry *dentry, int index) ++{ ++ struct super_block *lower_sb; ++ ++ BUG_ON(index < 0); ++ ++ if (!(branchperms(dentry->d_sb, index) & MAY_WRITE)) ++ return -EROFS; ++ ++ lower_sb = unionfs_lower_super_idx(dentry->d_sb, index); ++ BUG_ON(lower_sb == NULL); ++ /* ++ * test sb flags directly, not IS_RDONLY(lower_inode) because the ++ * lower_dentry could be a negative. ++ */ ++ if (lower_sb->s_flags & MS_RDONLY) ++ return -EROFS; ++ ++ return 0; ++} ++ ++static inline int is_robranch(const struct dentry *dentry) ++{ ++ int index; ++ ++ index = UNIONFS_D(dentry)->bstart; ++ BUG_ON(index < 0); ++ ++ return is_robranch_idx(dentry, index); ++} ++ ++/* ++ * EXTERNALS: ++ */ ++extern int check_branch(struct nameidata *nd); ++extern int parse_branch_mode(const char *name, int *perms); ++ ++/* locking helpers */ ++static inline struct dentry *lock_parent(struct dentry *dentry) ++{ ++ struct dentry *dir = dget_parent(dentry); ++ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); ++ return dir; ++} ++static inline struct dentry *lock_parent_wh(struct dentry *dentry) ++{ ++ struct dentry *dir = dget_parent(dentry); ++ ++ mutex_lock_nested(&dir->d_inode->i_mutex, UNIONFS_DMUTEX_WHITEOUT); ++ return dir; ++} ++ ++static inline void unlock_dir(struct dentry *dir) ++{ ++ mutex_unlock(&dir->d_inode->i_mutex); ++ dput(dir); ++} ++ ++/* lock base inode mutex before calling lookup_one_len */ ++static inline struct dentry *lookup_lck_len(const char *name, ++ struct dentry *base, int len) ++{ ++ struct dentry *d; ++ mutex_lock(&base->d_inode->i_mutex); ++ d = lookup_one_len(name, base, len); ++ mutex_unlock(&base->d_inode->i_mutex); ++ return d; ++} ++ ++static inline struct vfsmount *unionfs_mntget(struct dentry *dentry, ++ int bindex) ++{ ++ struct vfsmount *mnt; ++ ++ BUG_ON(!dentry || bindex < 0); ++ ++ mnt = mntget(unionfs_lower_mnt_idx(dentry, bindex)); ++#ifdef CONFIG_UNION_FS_DEBUG ++ if (!mnt) ++ pr_debug("unionfs: mntget: mnt=%p bindex=%d\n", ++ mnt, bindex); ++#endif /* CONFIG_UNION_FS_DEBUG */ ++ ++ return mnt; ++} ++ ++static inline void unionfs_mntput(struct dentry *dentry, int bindex) ++{ ++ struct vfsmount *mnt; ++ ++ if (!dentry && bindex < 0) ++ return; ++ BUG_ON(!dentry || bindex < 0); ++ ++ mnt = unionfs_lower_mnt_idx(dentry, bindex); ++#ifdef CONFIG_UNION_FS_DEBUG ++ /* ++ * Directories can have NULL lower objects in between start/end, but ++ * NOT if at the start/end range. We cannot verify that this dentry ++ * is a type=DIR, because it may already be a negative dentry. But ++ * if dbstart is greater than dbend, we know that this couldn't have ++ * been a regular file: it had to have been a directory. ++ */ ++ if (!mnt && !(bindex > dbstart(dentry) && bindex < dbend(dentry))) ++ pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt, bindex); ++#endif /* CONFIG_UNION_FS_DEBUG */ ++ mntput(mnt); ++} ++ ++#ifdef CONFIG_UNION_FS_DEBUG ++ ++/* useful for tracking code reachability */ ++#define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) ++ ++#define unionfs_check_inode(i) __unionfs_check_inode((i), \ ++ __FILE__, __func__, __LINE__) ++#define unionfs_check_dentry(d) __unionfs_check_dentry((d), \ ++ __FILE__, __func__, __LINE__) ++#define unionfs_check_file(f) __unionfs_check_file((f), \ ++ __FILE__, __func__, __LINE__) ++#define unionfs_check_nd(n) __unionfs_check_nd((n), \ ++ __FILE__, __func__, __LINE__) ++#define show_branch_counts(sb) __show_branch_counts((sb), \ ++ __FILE__, __func__, __LINE__) ++#define show_inode_times(i) __show_inode_times((i), \ ++ __FILE__, __func__, __LINE__) ++#define show_dinode_times(d) __show_dinode_times((d), \ ++ __FILE__, __func__, __LINE__) ++#define show_inode_counts(i) __show_inode_counts((i), \ ++ __FILE__, __func__, __LINE__) ++ ++extern void __unionfs_check_inode(const struct inode *inode, const char *fname, ++ const char *fxn, int line); ++extern void __unionfs_check_dentry(const struct dentry *dentry, ++ const char *fname, const char *fxn, ++ int line); ++extern void __unionfs_check_file(const struct file *file, ++ const char *fname, const char *fxn, int line); ++extern void __unionfs_check_nd(const struct nameidata *nd, ++ const char *fname, const char *fxn, int line); ++extern void __show_branch_counts(const struct super_block *sb, ++ const char *file, const char *fxn, int line); ++extern void __show_inode_times(const struct inode *inode, ++ const char *file, const char *fxn, int line); ++extern void __show_dinode_times(const struct dentry *dentry, ++ const char *file, const char *fxn, int line); ++extern void __show_inode_counts(const struct inode *inode, ++ const char *file, const char *fxn, int line); ++ ++#else /* not CONFIG_UNION_FS_DEBUG */ ++ ++/* we leave useful hooks for these check functions throughout the code */ ++#define unionfs_check_inode(i) do { } while (0) ++#define unionfs_check_dentry(d) do { } while (0) ++#define unionfs_check_file(f) do { } while (0) ++#define unionfs_check_nd(n) do { } while (0) ++#define show_branch_counts(sb) do { } while (0) ++#define show_inode_times(i) do { } while (0) ++#define show_dinode_times(d) do { } while (0) ++#define show_inode_counts(i) do { } while (0) ++ ++#endif /* not CONFIG_UNION_FS_DEBUG */ ++ ++#endif /* not _UNION_H_ */ +diff --git a/fs/unionfs/unlink.c b/fs/unionfs/unlink.c +new file mode 100644 +index 0000000..542c513 +--- /dev/null ++++ b/fs/unionfs/unlink.c +@@ -0,0 +1,278 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Helper function for Unionfs's unlink operation. ++ * ++ * The main goal of this function is to optimize the unlinking of non-dir ++ * objects in unionfs by deleting all possible lower inode objects from the ++ * underlying branches having same dentry name as the non-dir dentry on ++ * which this unlink operation is called. This way we delete as many lower ++ * inodes as possible, and save space. Whiteouts need to be created in ++ * branch0 only if unlinking fails on any of the lower branch other than ++ * branch0, or if a lower branch is marked read-only. ++ * ++ * Also, while unlinking a file, if we encounter any dir type entry in any ++ * intermediate branch, then we remove the directory by calling vfs_rmdir. ++ * The following special cases are also handled: ++ ++ * (1) If an error occurs in branch0 during vfs_unlink, then we return ++ * appropriate error. ++ * ++ * (2) If we get an error during unlink in any of other lower branch other ++ * than branch0, then we create a whiteout in branch0. ++ * ++ * (3) If a whiteout already exists in any intermediate branch, we delete ++ * all possible inodes only up to that branch (this is an "opaqueness" ++ * as as per Documentation/filesystems/unionfs/concepts.txt). ++ * ++ */ ++static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry, ++ struct dentry *parent) ++{ ++ struct dentry *lower_dentry; ++ struct dentry *lower_dir_dentry; ++ int bindex; ++ int err = 0; ++ ++ err = unionfs_partial_lookup(dentry, parent); ++ if (err) ++ goto out; ++ ++ /* trying to unlink all possible valid instances */ ++ for (bindex = dbstart(dentry); bindex <= dbend(dentry); bindex++) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!lower_dentry || !lower_dentry->d_inode) ++ continue; ++ ++ lower_dir_dentry = lock_parent(lower_dentry); ++ ++ /* avoid destroying the lower inode if the object is in use */ ++ dget(lower_dentry); ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (!err) { ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ if (!S_ISDIR(lower_dentry->d_inode->i_mode)) ++ err = vfs_unlink(lower_dir_dentry->d_inode, ++ lower_dentry); ++ else ++ err = vfs_rmdir(lower_dir_dentry->d_inode, ++ lower_dentry); ++ lockdep_on(); ++ } ++ ++ /* if lower object deletion succeeds, update inode's times */ ++ if (!err) ++ unionfs_copy_attr_times(dentry->d_inode); ++ dput(lower_dentry); ++ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); ++ unlock_dir(lower_dir_dentry); ++ ++ if (err) ++ break; ++ } ++ ++ /* ++ * Create the whiteout in branch 0 (highest priority) only if (a) ++ * there was an error in any intermediate branch other than branch 0 ++ * due to failure of vfs_unlink/vfs_rmdir or (b) a branch marked or ++ * mounted read-only. ++ */ ++ if (err) { ++ if ((bindex == 0) || ++ ((bindex == dbstart(dentry)) && ++ (!IS_COPYUP_ERR(err)))) ++ goto out; ++ else { ++ if (!IS_COPYUP_ERR(err)) ++ pr_debug("unionfs: lower object deletion " ++ "failed in branch:%d\n", bindex); ++ err = create_whiteout(dentry, sbstart(dentry->d_sb)); ++ } ++ } ++ ++out: ++ if (!err) ++ inode_dec_link_count(dentry->d_inode); ++ ++ /* We don't want to leave negative leftover dentries for revalidate. */ ++ if (!err && (dbopaque(dentry) != -1)) ++ update_bstart(dentry); ++ ++ return err; ++} ++ ++int unionfs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ int err = 0; ++ struct inode *inode = dentry->d_inode; ++ struct dentry *parent; ++ int valid; ++ ++ BUG_ON(S_ISDIR(inode->i_mode)); ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ unionfs_check_dentry(dentry); ++ ++ err = unionfs_unlink_whiteout(dir, dentry, parent); ++ /* call d_drop so the system "forgets" about us */ ++ if (!err) { ++ unionfs_postcopyup_release(dentry); ++ unionfs_postcopyup_setmnt(parent); ++ if (inode->i_nlink == 0) /* drop lower inodes */ ++ iput_lowers_all(inode, false); ++ d_drop(dentry); ++ /* ++ * if unlink/whiteout succeeded, parent dir mtime has ++ * changed ++ */ ++ unionfs_copy_attr_times(dir); ++ } ++ ++out: ++ if (!err) { ++ unionfs_check_dentry(dentry); ++ unionfs_check_inode(dir); ++ } ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry, ++ struct unionfs_dir_state *namelist) ++{ ++ int err; ++ struct dentry *lower_dentry; ++ struct dentry *lower_dir_dentry = NULL; ++ ++ /* Here we need to remove whiteout entries. */ ++ err = delete_whiteouts(dentry, dbstart(dentry), namelist); ++ if (err) ++ goto out; ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ lower_dir_dentry = lock_parent(lower_dentry); ++ ++ /* avoid destroying the lower inode if the file is in use */ ++ dget(lower_dentry); ++ err = is_robranch(dentry); ++ if (!err) ++ err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); ++ dput(lower_dentry); ++ ++ fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); ++ /* propagate number of hard-links */ ++ dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode); ++ ++out: ++ if (lower_dir_dentry) ++ unlock_dir(lower_dir_dentry); ++ return err; ++} ++ ++int unionfs_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ int err = 0; ++ struct unionfs_dir_state *namelist = NULL; ++ struct dentry *parent; ++ int dstart, dend; ++ bool valid; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ unionfs_check_dentry(dentry); ++ ++ /* check if this unionfs directory is empty or not */ ++ err = check_empty(dentry, parent, &namelist); ++ if (err) ++ goto out; ++ ++ err = unionfs_rmdir_first(dir, dentry, namelist); ++ dstart = dbstart(dentry); ++ dend = dbend(dentry); ++ /* ++ * We create a whiteout for the directory if there was an error to ++ * rmdir the first directory entry in the union. Otherwise, we ++ * create a whiteout only if there is no chance that a lower ++ * priority branch might also have the same named directory. IOW, ++ * if there is not another same-named directory at a lower priority ++ * branch, then we don't need to create a whiteout for it. ++ */ ++ if (!err) { ++ if (dstart < dend) ++ err = create_whiteout(dentry, dstart); ++ } else { ++ int new_err; ++ ++ if (dstart == 0) ++ goto out; ++ ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ ++ new_err = create_whiteout(dentry, dstart - 1); ++ if (new_err != -EEXIST) ++ err = new_err; ++ } ++ ++out: ++ /* ++ * Drop references to lower dentry/inode so storage space for them ++ * can be reclaimed. Then, call d_drop so the system "forgets" ++ * about us. ++ */ ++ if (!err) { ++ iput_lowers_all(dentry->d_inode, false); ++ dput(unionfs_lower_dentry_idx(dentry, dstart)); ++ unionfs_set_lower_dentry_idx(dentry, dstart, NULL); ++ d_drop(dentry); ++ /* update our lower vfsmnts, in case a copyup took place */ ++ unionfs_postcopyup_setmnt(dentry); ++ unionfs_check_dentry(dentry); ++ unionfs_check_inode(dir); ++ } ++ ++ if (namelist) ++ free_rdstate(namelist); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff --git a/fs/unionfs/whiteout.c b/fs/unionfs/whiteout.c +new file mode 100644 +index 0000000..405073a +--- /dev/null ++++ b/fs/unionfs/whiteout.c +@@ -0,0 +1,584 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * whiteout and opaque directory helpers ++ */ ++ ++/* What do we use for whiteouts. */ ++#define UNIONFS_WHPFX ".wh." ++#define UNIONFS_WHLEN 4 ++/* ++ * If a directory contains this file, then it is opaque. We start with the ++ * .wh. flag so that it is blocked by lookup. ++ */ ++#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque" ++#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME ++ ++/* construct whiteout filename */ ++char *alloc_whname(const char *name, int len) ++{ ++ char *buf; ++ ++ buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL); ++ if (unlikely(!buf)) ++ return ERR_PTR(-ENOMEM); ++ ++ strcpy(buf, UNIONFS_WHPFX); ++ strlcat(buf, name, len + UNIONFS_WHLEN + 1); ++ ++ return buf; ++} ++ ++/* ++ * XXX: this can be inline or CPP macro, but is here to keep all whiteout ++ * code in one place. ++ */ ++void unionfs_set_max_namelen(long *namelen) ++{ ++ *namelen -= UNIONFS_WHLEN; ++} ++ ++/* check if @namep is a whiteout, update @namep and @namelenp accordingly */ ++bool is_whiteout_name(char **namep, int *namelenp) ++{ ++ if (*namelenp > UNIONFS_WHLEN && ++ !strncmp(*namep, UNIONFS_WHPFX, UNIONFS_WHLEN)) { ++ *namep += UNIONFS_WHLEN; ++ *namelenp -= UNIONFS_WHLEN; ++ return true; ++ } ++ return false; ++} ++ ++/* is the filename valid == !(whiteout for a file or opaque dir marker) */ ++bool is_validname(const char *name) ++{ ++ if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) ++ return false; ++ if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME, ++ sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1)) ++ return false; ++ return true; ++} ++ ++/* ++ * Look for a whiteout @name in @lower_parent directory. If error, return ++ * ERR_PTR. Caller must dput() the returned dentry if not an error. ++ * ++ * XXX: some callers can reuse the whname allocated buffer to avoid repeated ++ * free then re-malloc calls. Need to provide a different API for those ++ * callers. ++ */ ++struct dentry *lookup_whiteout(const char *name, struct dentry *lower_parent) ++{ ++ char *whname = NULL; ++ int err = 0, namelen; ++ struct dentry *wh_dentry = NULL; ++ ++ namelen = strlen(name); ++ whname = alloc_whname(name, namelen); ++ if (unlikely(IS_ERR(whname))) { ++ err = PTR_ERR(whname); ++ goto out; ++ } ++ ++ /* check if whiteout exists in this branch: lookup .wh.foo */ ++ wh_dentry = lookup_lck_len(whname, lower_parent, strlen(whname)); ++ if (IS_ERR(wh_dentry)) { ++ err = PTR_ERR(wh_dentry); ++ goto out; ++ } ++ ++ /* check if negative dentry (ENOENT) */ ++ if (!wh_dentry->d_inode) ++ goto out; ++ ++ /* whiteout found: check if valid type */ ++ if (!S_ISREG(wh_dentry->d_inode->i_mode)) { ++ printk(KERN_ERR "unionfs: invalid whiteout %s entry type %d\n", ++ whname, wh_dentry->d_inode->i_mode); ++ dput(wh_dentry); ++ err = -EIO; ++ goto out; ++ } ++ ++out: ++ kfree(whname); ++ if (err) ++ wh_dentry = ERR_PTR(err); ++ return wh_dentry; ++} ++ ++/* find and return first whiteout in parent directory, else ENOENT */ ++struct dentry *find_first_whiteout(struct dentry *dentry) ++{ ++ int bindex, bstart, bend; ++ struct dentry *parent, *lower_parent, *wh_dentry; ++ ++ parent = dget_parent(dentry); ++ ++ bstart = dbstart(parent); ++ bend = dbend(parent); ++ wh_dentry = ERR_PTR(-ENOENT); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ lower_parent = unionfs_lower_dentry_idx(parent, bindex); ++ if (!lower_parent) ++ continue; ++ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_parent); ++ if (IS_ERR(wh_dentry)) ++ continue; ++ if (wh_dentry->d_inode) ++ break; ++ dput(wh_dentry); ++ wh_dentry = ERR_PTR(-ENOENT); ++ } ++ ++ dput(parent); ++ ++ return wh_dentry; ++} ++ ++/* ++ * Unlink a whiteout dentry. Returns 0 or -errno. Caller must hold and ++ * release dentry reference. ++ */ ++int unlink_whiteout(struct dentry *wh_dentry) ++{ ++ int err; ++ struct dentry *lower_dir_dentry; ++ ++ /* dget and lock parent dentry */ ++ lower_dir_dentry = lock_parent_wh(wh_dentry); ++ ++ /* see Documentation/filesystems/unionfs/issues.txt */ ++ lockdep_off(); ++ err = vfs_unlink(lower_dir_dentry->d_inode, wh_dentry); ++ lockdep_on(); ++ unlock_dir(lower_dir_dentry); ++ ++ /* ++ * Whiteouts are special files and should be deleted no matter what ++ * (as if they never existed), in order to allow this create ++ * operation to succeed. This is especially important in sticky ++ * directories: a whiteout may have been created by one user, but ++ * the newly created file may be created by another user. ++ * Therefore, in order to maintain Unix semantics, if the vfs_unlink ++ * above failed, then we have to try to directly unlink the ++ * whiteout. Note: in the ODF version of unionfs, whiteout are ++ * handled much more cleanly. ++ */ ++ if (err == -EPERM) { ++ struct inode *inode = lower_dir_dentry->d_inode; ++ err = inode->i_op->unlink(inode, wh_dentry); ++ } ++ if (err) ++ printk(KERN_ERR "unionfs: could not unlink whiteout %s, " ++ "err = %d\n", wh_dentry->d_name.name, err); ++ ++ return err; ++ ++} ++ ++/* ++ * Helper function when creating new objects (create, symlink, mknod, etc.). ++ * Checks to see if there's a whiteout in @lower_dentry's parent directory, ++ * whose name is taken from @dentry. Then tries to remove that whiteout, if ++ * found. If <dentry,bindex> is a branch marked readonly, return -EROFS. ++ * If it finds both a regular file and a whiteout, return -EIO (this should ++ * never happen). ++ * ++ * Return 0 if no whiteout was found. Return 1 if one was found and ++ * successfully removed. Therefore a value >= 0 tells the caller that ++ * @lower_dentry belongs to a good branch to create the new object in). ++ * Return -ERRNO if an error occurred during whiteout lookup or in trying to ++ * unlink the whiteout. ++ */ ++int check_unlink_whiteout(struct dentry *dentry, struct dentry *lower_dentry, ++ int bindex) ++{ ++ int err; ++ struct dentry *wh_dentry = NULL; ++ struct dentry *lower_dir_dentry = NULL; ++ ++ /* look for whiteout dentry first */ ++ lower_dir_dentry = dget_parent(lower_dentry); ++ wh_dentry = lookup_whiteout(dentry->d_name.name, lower_dir_dentry); ++ dput(lower_dir_dentry); ++ if (IS_ERR(wh_dentry)) { ++ err = PTR_ERR(wh_dentry); ++ goto out; ++ } ++ ++ if (!wh_dentry->d_inode) { /* no whiteout exists*/ ++ err = 0; ++ goto out_dput; ++ } ++ ++ /* check if regular file and whiteout were both found */ ++ if (unlikely(lower_dentry->d_inode)) { ++ err = -EIO; ++ printk(KERN_ERR "unionfs: found both whiteout and regular " ++ "file in directory %s (branch %d)\n", ++ lower_dir_dentry->d_name.name, bindex); ++ goto out_dput; ++ } ++ ++ /* check if branch is writeable */ ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (err) ++ goto out_dput; ++ ++ /* .wh.foo has been found, so let's unlink it */ ++ err = unlink_whiteout(wh_dentry); ++ if (!err) ++ err = 1; /* a whiteout was found and successfully removed */ ++out_dput: ++ dput(wh_dentry); ++out: ++ return err; ++} ++ ++/* ++ * Pass an unionfs dentry and an index. It will try to create a whiteout ++ * for the filename in dentry, and will try in branch 'index'. On error, ++ * it will proceed to a branch to the left. ++ */ ++int create_whiteout(struct dentry *dentry, int start) ++{ ++ int bstart, bend, bindex; ++ struct dentry *lower_dir_dentry; ++ struct dentry *lower_dentry; ++ struct dentry *lower_wh_dentry; ++ struct nameidata nd; ++ char *name = NULL; ++ int err = -EINVAL; ++ ++ verify_locked(dentry); ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ /* create dentry's whiteout equivalent */ ++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (unlikely(IS_ERR(name))) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ for (bindex = start; bindex >= 0; bindex--) { ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ ++ if (!lower_dentry) { ++ /* ++ * if lower dentry is not present, create the ++ * entire lower dentry directory structure and go ++ * ahead. Since we want to just create whiteout, we ++ * only want the parent dentry, and hence get rid of ++ * this dentry. ++ */ ++ lower_dentry = create_parents(dentry->d_inode, ++ dentry, ++ dentry->d_name.name, ++ bindex); ++ if (!lower_dentry || IS_ERR(lower_dentry)) { ++ int ret = PTR_ERR(lower_dentry); ++ if (!IS_COPYUP_ERR(ret)) ++ printk(KERN_ERR ++ "unionfs: create_parents for " ++ "whiteout failed: bindex=%d " ++ "err=%d\n", bindex, ret); ++ continue; ++ } ++ } ++ ++ lower_wh_dentry = ++ lookup_lck_len(name, lower_dentry->d_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(lower_wh_dentry)) ++ continue; ++ ++ /* ++ * The whiteout already exists. This used to be impossible, ++ * but now is possible because of opaqueness. ++ */ ++ if (lower_wh_dentry->d_inode) { ++ dput(lower_wh_dentry); ++ err = 0; ++ goto out; ++ } ++ ++ err = init_lower_nd(&nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out; ++ lower_dir_dentry = lock_parent_wh(lower_wh_dentry); ++ err = is_robranch_super(dentry->d_sb, bindex); ++ if (!err) ++ err = vfs_create(lower_dir_dentry->d_inode, ++ lower_wh_dentry, ++ current_umask() & S_IRUGO, ++ &nd); ++ unlock_dir(lower_dir_dentry); ++ dput(lower_wh_dentry); ++ release_lower_nd(&nd, err); ++ ++ if (!err || !IS_COPYUP_ERR(err)) ++ break; ++ } ++ ++ /* set dbopaque so that lookup will not proceed after this branch */ ++ if (!err) ++ dbopaque(dentry) = bindex; ++ ++out: ++ kfree(name); ++ return err; ++} ++ ++/* ++ * Delete all of the whiteouts in a given directory for rmdir. ++ * ++ * lower directory inode should be locked ++ */ ++static int do_delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist) ++{ ++ int err = 0; ++ struct dentry *lower_dir_dentry = NULL; ++ struct dentry *lower_dentry; ++ char *name = NULL, *p; ++ struct inode *lower_dir; ++ int i; ++ struct list_head *pos; ++ struct filldir_node *cursor; ++ ++ /* Find out lower parent dentry */ ++ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode)); ++ lower_dir = lower_dir_dentry->d_inode; ++ BUG_ON(!S_ISDIR(lower_dir->i_mode)); ++ ++ err = -ENOMEM; ++ name = __getname(); ++ if (unlikely(!name)) ++ goto out; ++ strcpy(name, UNIONFS_WHPFX); ++ p = name + UNIONFS_WHLEN; ++ ++ err = 0; ++ for (i = 0; !err && i < namelist->size; i++) { ++ list_for_each(pos, &namelist->list[i]) { ++ cursor = ++ list_entry(pos, struct filldir_node, ++ file_list); ++ /* Only operate on whiteouts in this branch. */ ++ if (cursor->bindex != bindex) ++ continue; ++ if (!cursor->whiteout) ++ continue; ++ ++ strlcpy(p, cursor->name, PATH_MAX - UNIONFS_WHLEN); ++ lower_dentry = ++ lookup_lck_len(name, lower_dir_dentry, ++ cursor->namelen + ++ UNIONFS_WHLEN); ++ if (IS_ERR(lower_dentry)) { ++ err = PTR_ERR(lower_dentry); ++ break; ++ } ++ if (lower_dentry->d_inode) ++ err = vfs_unlink(lower_dir, lower_dentry); ++ dput(lower_dentry); ++ if (err) ++ break; ++ } ++ } ++ ++ __putname(name); ++ ++ /* After all of the removals, we should copy the attributes once. */ ++ fsstack_copy_attr_times(dentry->d_inode, lower_dir_dentry->d_inode); ++ ++out: ++ return err; ++} ++ ++ ++void __delete_whiteouts(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct deletewh_args *d = &args->deletewh; ++ ++ args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist); ++ complete(&args->comp); ++} ++ ++/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */ ++int delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist) ++{ ++ int err; ++ struct super_block *sb; ++ struct dentry *lower_dir_dentry; ++ struct inode *lower_dir; ++ struct sioq_args args; ++ ++ sb = dentry->d_sb; ++ ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); ++ BUG_ON(bindex < dbstart(dentry)); ++ BUG_ON(bindex > dbend(dentry)); ++ err = is_robranch_super(sb, bindex); ++ if (err) ++ goto out; ++ ++ lower_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ BUG_ON(!S_ISDIR(lower_dir_dentry->d_inode->i_mode)); ++ lower_dir = lower_dir_dentry->d_inode; ++ BUG_ON(!S_ISDIR(lower_dir->i_mode)); ++ ++ if (!inode_permission(lower_dir, MAY_WRITE | MAY_EXEC)) { ++ err = do_delete_whiteouts(dentry, bindex, namelist); ++ } else { ++ args.deletewh.namelist = namelist; ++ args.deletewh.dentry = dentry; ++ args.deletewh.bindex = bindex; ++ run_sioq(__delete_whiteouts, &args); ++ err = args.err; ++ } ++ ++out: ++ return err; ++} ++ ++/**************************************************************************** ++ * Opaque directory helpers * ++ ****************************************************************************/ ++ ++/* ++ * is_opaque_dir: returns 0 if it is NOT an opaque dir, 1 if it is, and ++ * -errno if an error occurred trying to figure this out. ++ */ ++int is_opaque_dir(struct dentry *dentry, int bindex) ++{ ++ int err = 0; ++ struct dentry *lower_dentry; ++ struct dentry *wh_lower_dentry; ++ struct inode *lower_inode; ++ struct sioq_args args; ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ lower_inode = lower_dentry->d_inode; ++ ++ BUG_ON(!S_ISDIR(lower_inode->i_mode)); ++ ++ mutex_lock(&lower_inode->i_mutex); ++ ++ if (!inode_permission(lower_inode, MAY_EXEC)) { ++ wh_lower_dentry = ++ lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ } else { ++ args.is_opaque.dentry = lower_dentry; ++ run_sioq(__is_opaque_dir, &args); ++ wh_lower_dentry = args.ret; ++ } ++ ++ mutex_unlock(&lower_inode->i_mutex); ++ ++ if (IS_ERR(wh_lower_dentry)) { ++ err = PTR_ERR(wh_lower_dentry); ++ goto out; ++ } ++ ++ /* This is an opaque dir iff wh_lower_dentry is positive */ ++ err = !!wh_lower_dentry->d_inode; ++ ++ dput(wh_lower_dentry); ++out: ++ return err; ++} ++ ++void __is_opaque_dir(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ ++ args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ complete(&args->comp); ++} ++ ++int make_dir_opaque(struct dentry *dentry, int bindex) ++{ ++ int err = 0; ++ struct dentry *lower_dentry, *diropq; ++ struct inode *lower_dir; ++ struct nameidata nd; ++ const struct cred *old_creds; ++ struct cred *new_creds; ++ ++ /* ++ * Opaque directory whiteout markers are special files (like regular ++ * whiteouts), and should appear to the users as if they don't ++ * exist. They should be created/deleted regardless of directory ++ * search/create permissions, but only for the duration of this ++ * creation of the .wh.__dir_opaque: file. Note, this does not ++ * circumvent normal ->permission). ++ */ ++ new_creds = prepare_creds(); ++ if (unlikely(!new_creds)) { ++ err = -ENOMEM; ++ goto out_err; ++ } ++ cap_raise(new_creds->cap_effective, CAP_DAC_READ_SEARCH); ++ cap_raise(new_creds->cap_effective, CAP_DAC_OVERRIDE); ++ old_creds = override_creds(new_creds); ++ ++ lower_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ lower_dir = lower_dentry->d_inode; ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) || ++ !S_ISDIR(lower_dir->i_mode)); ++ ++ mutex_lock(&lower_dir->i_mutex); ++ diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, lower_dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ if (IS_ERR(diropq)) { ++ err = PTR_ERR(diropq); ++ goto out; ++ } ++ ++ err = init_lower_nd(&nd, LOOKUP_CREATE); ++ if (unlikely(err < 0)) ++ goto out; ++ if (!diropq->d_inode) ++ err = vfs_create(lower_dir, diropq, S_IRUGO, &nd); ++ if (!err) ++ dbopaque(dentry) = bindex; ++ release_lower_nd(&nd, err); ++ ++ dput(diropq); ++ ++out: ++ mutex_unlock(&lower_dir->i_mutex); ++ revert_creds(old_creds); ++out_err: ++ return err; ++} +diff --git a/fs/unionfs/xattr.c b/fs/unionfs/xattr.c +new file mode 100644 +index 0000000..9002e06 +--- /dev/null ++++ b/fs/unionfs/xattr.c +@@ -0,0 +1,173 @@ ++/* ++ * Copyright (c) 2003-2010 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2010 Stony Brook University ++ * Copyright (c) 2003-2010 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* This is lifted from fs/xattr.c */ ++void *unionfs_xattr_alloc(size_t size, size_t limit) ++{ ++ void *ptr; ++ ++ if (size > limit) ++ return ERR_PTR(-E2BIG); ++ ++ if (!size) /* size request, no buffer is needed */ ++ return NULL; ++ ++ ptr = kmalloc(size, GFP_KERNEL); ++ if (unlikely(!ptr)) ++ return ERR_PTR(-ENOMEM); ++ return ptr; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value, ++ size_t size) ++{ ++ struct dentry *lower_dentry = NULL; ++ struct dentry *parent; ++ int err = -EOPNOTSUPP; ++ bool valid; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_getxattr(lower_dentry, (char *) name, value, size); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++int unionfs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ struct dentry *lower_dentry = NULL; ++ struct dentry *parent; ++ int err = -EOPNOTSUPP; ++ bool valid; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_setxattr(lower_dentry, (char *) name, (void *) value, ++ size, flags); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++int unionfs_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct dentry *lower_dentry = NULL; ++ struct dentry *parent; ++ int err = -EOPNOTSUPP; ++ bool valid; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_removexattr(lower_dentry, (char *) name); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size) ++{ ++ struct dentry *lower_dentry = NULL; ++ struct dentry *parent; ++ int err = -EOPNOTSUPP; ++ char *encoded_list = NULL; ++ bool valid; ++ ++ unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); ++ parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); ++ unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); ++ ++ valid = __unionfs_d_revalidate(dentry, parent, false); ++ if (unlikely(!valid)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(dentry); ++ ++ encoded_list = list; ++ err = vfs_listxattr(lower_dentry, encoded_list, size); ++ ++out: ++ unionfs_check_dentry(dentry); ++ unionfs_unlock_dentry(dentry); ++ unionfs_unlock_parent(dentry, parent); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h +index da317c7..64f1ced 100644 +--- a/include/linux/fs_stack.h ++++ b/include/linux/fs_stack.h +@@ -1,7 +1,19 @@ ++/* ++ * Copyright (c) 2006-2009 Erez Zadok ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006-2009 Stony Brook University ++ * Copyright (c) 2006-2009 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ + #ifndef _LINUX_FS_STACK_H + #define _LINUX_FS_STACK_H + +-/* This file defines generic functions used primarily by stackable ++/* ++ * This file defines generic functions used primarily by stackable + * filesystems; none of these functions require i_mutex to be held. + */ + +diff --git a/include/linux/magic.h b/include/linux/magic.h +index eb9800f..9770154 100644 +--- a/include/linux/magic.h ++++ b/include/linux/magic.h +@@ -47,6 +47,8 @@ + #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" + #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" + ++#define UNIONFS_SUPER_MAGIC 0xf15f083d ++ + #define SMB_SUPER_MAGIC 0x517B + #define USBDEVICE_SUPER_MAGIC 0x9fa2 + #define CGROUP_SUPER_MAGIC 0x27e0eb +diff --git a/include/linux/namei.h b/include/linux/namei.h +index 05b441d..dca6f9a 100644 +--- a/include/linux/namei.h ++++ b/include/linux/namei.h +@@ -72,6 +72,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + + extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)); ++extern void release_open_intent(struct nameidata *); + + extern struct dentry *lookup_one_len(const char *, struct dentry *, int); + +diff --git a/include/linux/splice.h b/include/linux/splice.h +index 997c3b4..54f5501 100644 +--- a/include/linux/splice.h ++++ b/include/linux/splice.h +@@ -81,6 +81,11 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *, + struct splice_pipe_desc *); + extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, + splice_direct_actor *); ++extern long vfs_splice_from(struct pipe_inode_info *pipe, struct file *out, ++ loff_t *ppos, size_t len, unsigned int flags); ++extern long vfs_splice_to(struct file *in, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags); + + /* + * for dynamic pipe sizing +diff --git a/include/linux/union_fs.h b/include/linux/union_fs.h +new file mode 100644 +index 0000000..c84d97e +--- /dev/null ++++ b/include/linux/union_fs.h +@@ -0,0 +1,22 @@ ++/* ++ * Copyright (c) 2003-2009 Erez Zadok ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2003-2009 Stony Brook University ++ * Copyright (c) 2003-2009 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _LINUX_UNION_FS_H ++#define _LINUX_UNION_FS_H ++ ++/* ++ * DEFINITIONS FOR USER AND KERNEL CODE: ++ */ ++# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int) ++# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int) ++ ++#endif /* _LINUX_UNIONFS_H */ ++ +diff --git a/security/security.c b/security/security.c +index c53949f..eb71394 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -528,6 +528,7 @@ int security_inode_permission(struct inode *inode, int mask) + return 0; + return security_ops->inode_permission(inode, mask); + } ++EXPORT_SYMBOL(security_inode_permission); + + int security_inode_setattr(struct dentry *dentry, struct iattr *attr) + { |