From 3583a71183a02c51ca71cd180e9189cfb0411cc1 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 22 Jul 2008 20:21:23 +0300
Subject: make selinux_write_opts() static

This patch makes the needlessly global selinux_write_opts() static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/hooks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3ae9bec5a508..0ffd8814af3e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -957,7 +957,8 @@ out_err:
 	return rc;
 }
 
-void selinux_write_opts(struct seq_file *m, struct security_mnt_opts *opts)
+static void selinux_write_opts(struct seq_file *m,
+			       struct security_mnt_opts *opts)
 {
 	int i;
 	char *prefix;
-- 
cgit v1.2.3


From df4ea865f09580b1cad621c0426612f598847815 Mon Sep 17 00:00:00 2001
From: Vesa-Matti J Kari <vmkari@cc.helsinki.fi>
Date: Sun, 20 Jul 2008 23:57:01 +0300
Subject: SELinux: Trivial minor fixes that change C null character style

Trivial minor fixes that change C null character style.

Signed-off-by: Vesa-Matti Kari <vmkari@cc.helsinki.fi>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/conditional.c |  2 +-
 security/selinux/ss/mls.c         | 14 +++++++-------
 security/selinux/ss/policydb.c    | 20 ++++++++++----------
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'security')

diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index fb4efe4f4bc8..f8c850a56ed1 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -239,7 +239,7 @@ int cond_read_bool(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto err;
-	key[len] = 0;
+	key[len] = '\0';
 	if (hashtab_insert(h, key, booldatum))
 		goto err;
 
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 77d745da48bb..b5407f16c2a4 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -283,8 +283,8 @@ int mls_context_to_sid(struct policydb *pol,
 		p++;
 
 	delim = *p;
-	if (delim != 0)
-		*p++ = 0;
+	if (delim != '\0')
+		*p++ = '\0';
 
 	for (l = 0; l < 2; l++) {
 		levdatum = hashtab_search(pol->p_levels.table, scontextp);
@@ -302,14 +302,14 @@ int mls_context_to_sid(struct policydb *pol,
 				while (*p && *p != ',' && *p != '-')
 					p++;
 				delim = *p;
-				if (delim != 0)
-					*p++ = 0;
+				if (delim != '\0')
+					*p++ = '\0';
 
 				/* Separate into range if exists */
 				rngptr = strchr(scontextp, '.');
 				if (rngptr != NULL) {
 					/* Remove '.' */
-					*rngptr++ = 0;
+					*rngptr++ = '\0';
 				}
 
 				catdatum = hashtab_search(pol->p_cats.table,
@@ -357,8 +357,8 @@ int mls_context_to_sid(struct policydb *pol,
 				p++;
 
 			delim = *p;
-			if (delim != 0)
-				*p++ = 0;
+			if (delim != '\0')
+				*p++ = '\0';
 		} else
 			break;
 	}
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 2391761ae422..26646305dc0e 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -932,7 +932,7 @@ static int perm_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	rc = hashtab_insert(h, key, perdatum);
 	if (rc)
@@ -979,7 +979,7 @@ static int common_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	for (i = 0; i < nel; i++) {
 		rc = perm_read(p, comdatum->permissions.table, fp);
@@ -1117,7 +1117,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	if (len2) {
 		cladatum->comkey = kmalloc(len2 + 1, GFP_KERNEL);
@@ -1128,7 +1128,7 @@ static int class_read(struct policydb *p, struct hashtab *h, void *fp)
 		rc = next_entry(cladatum->comkey, fp, len2);
 		if (rc < 0)
 			goto bad;
-		cladatum->comkey[len2] = 0;
+		cladatum->comkey[len2] = '\0';
 
 		cladatum->comdatum = hashtab_search(p->p_commons.table,
 						    cladatum->comkey);
@@ -1201,7 +1201,7 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	rc = ebitmap_read(&role->dominates, fp);
 	if (rc)
@@ -1262,7 +1262,7 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	rc = hashtab_insert(h, key, typdatum);
 	if (rc)
@@ -1334,7 +1334,7 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	rc = ebitmap_read(&usrdatum->roles, fp);
 	if (rc)
@@ -1388,7 +1388,7 @@ static int sens_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	levdatum->level = kmalloc(sizeof(struct mls_level), GFP_ATOMIC);
 	if (!levdatum->level) {
@@ -1440,7 +1440,7 @@ static int cat_read(struct policydb *p, struct hashtab *h, void *fp)
 	rc = next_entry(key, fp, len);
 	if (rc < 0)
 		goto bad;
-	key[len] = 0;
+	key[len] = '\0';
 
 	rc = hashtab_insert(h, key, catdatum);
 	if (rc)
@@ -1523,7 +1523,7 @@ int policydb_read(struct policydb *p, void *fp)
 		kfree(policydb_str);
 		goto bad;
 	}
-	policydb_str[len] = 0;
+	policydb_str[len] = '\0';
 	if (strcmp(policydb_str, POLICYDB_STRING)) {
 		printk(KERN_ERR "SELinux:  policydb string %s does not match "
 		       "my string %s\n", policydb_str, POLICYDB_STRING);
-- 
cgit v1.2.3


From 0c0e186f812457e527c420f7a4d02865fd0dc7d2 Mon Sep 17 00:00:00 2001
From: Vesa-Matti J Kari <vmkari@cc.helsinki.fi>
Date: Mon, 21 Jul 2008 02:50:20 +0300
Subject: SELinux: trivial, remove unneeded local variable

Hello,

Remove unneeded local variable:

    struct avtab_node *newnode

Signed-off-by: Vesa-Matti Kari <vmkari@cc.helsinki.fi>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/avtab.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'security')

diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index a1be97f8beea..e8ae812d6af7 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -98,7 +98,7 @@ struct avtab_node *
 avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datum *datum)
 {
 	int hvalue;
-	struct avtab_node *prev, *cur, *newnode;
+	struct avtab_node *prev, *cur;
 	u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
 
 	if (!h || !h->htable)
@@ -122,9 +122,7 @@ avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datu
 		    key->target_class < cur->key.target_class)
 			break;
 	}
-	newnode = avtab_insert_node(h, hvalue, prev, cur, key, datum);
-
-	return newnode;
+	return avtab_insert_node(h, hvalue, prev, cur, key, datum);
 }
 
 struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key)
-- 
cgit v1.2.3


From cf9481e289247fe9cf40f2e2481220d899132049 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 27 Jul 2008 21:31:07 +1000
Subject: SELinux: Fix a potentially uninitialised variable in SELinux hooks

Fix a potentially uninitialised variable in SELinux hooks that's given a
pointer to the network address by selinux_parse_skb() passing a pointer back
through its argument list.  By restructuring selinux_parse_skb(), the compiler
can see that the error case need not set it as the caller will return
immediately.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/hooks.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 0ffd8814af3e..3eae30609702 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3539,38 +3539,44 @@ out:
 #endif /* IPV6 */
 
 static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad,
-			     char **addrp, int src, u8 *proto)
+			     char **_addrp, int src, u8 *proto)
 {
-	int ret = 0;
+	char *addrp;
+	int ret;
 
 	switch (ad->u.net.family) {
 	case PF_INET:
 		ret = selinux_parse_skb_ipv4(skb, ad, proto);
-		if (ret || !addrp)
-			break;
-		*addrp = (char *)(src ? &ad->u.net.v4info.saddr :
-					&ad->u.net.v4info.daddr);
-		break;
+		if (ret)
+			goto parse_error;
+		addrp = (char *)(src ? &ad->u.net.v4info.saddr :
+				       &ad->u.net.v4info.daddr);
+		goto okay;
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case PF_INET6:
 		ret = selinux_parse_skb_ipv6(skb, ad, proto);
-		if (ret || !addrp)
-			break;
-		*addrp = (char *)(src ? &ad->u.net.v6info.saddr :
-					&ad->u.net.v6info.daddr);
-		break;
+		if (ret)
+			goto parse_error;
+		addrp = (char *)(src ? &ad->u.net.v6info.saddr :
+				       &ad->u.net.v6info.daddr);
+		goto okay;
 #endif	/* IPV6 */
 	default:
-		break;
+		addrp = NULL;
+		goto okay;
 	}
 
-	if (unlikely(ret))
-		printk(KERN_WARNING
-		       "SELinux: failure in selinux_parse_skb(),"
-		       " unable to parse packet\n");
-
+parse_error:
+	printk(KERN_WARNING
+	       "SELinux: failure in selinux_parse_skb(),"
+	       " unable to parse packet\n");
 	return ret;
+
+okay:
+	if (_addrp)
+		*_addrp = addrp;
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 15446235367fa4a621ff5abfa4b6ebbe25b33763 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Wed, 30 Jul 2008 15:37:11 -0700
Subject: smack: limit privilege by label

There have been a number of requests to make the Smack LSM
enforce MAC even in the face of privilege, either capability
based or superuser based. This is not universally desired,
however, so it seems desirable to make it optional. Further,
at least one legacy OS implemented a scheme whereby only
processes running with one particular label could be exempt
from MAC. This patch supports these three cases.

If /smack/onlycap is empty (unset or null-string) privilege
is enforced in the normal way.

If /smack/onlycap contains a label only processes running with
that label may be MAC exempt.

If the label in /smack/onlycap is the star label ("*") the
semantics of the star label combine with the privilege
restrictions to prevent any violations of MAC, even in the
presence of privilege.

Again, this will be independent of the privilege scheme.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 security/smack/smack.h        |  1 +
 security/smack/smack_access.c | 10 ++++-
 security/smack/smackfs.c      | 92 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/smack/smack.h b/security/smack/smack.h
index 4a4477f5afdc..31dce559595a 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -178,6 +178,7 @@ u32 smack_to_secid(const char *);
 extern int smack_cipso_direct;
 extern int smack_net_nltype;
 extern char *smack_net_ambient;
+extern char *smack_onlycap;
 
 extern struct smack_known *smack_known;
 extern struct smack_known smack_known_floor;
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index f6b5f6eed6dd..79ff21ed4c3b 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -157,7 +157,7 @@ int smk_access(char *subject_label, char *object_label, int request)
  *
  * This function checks the current subject label/object label pair
  * in the access rule list and returns 0 if the access is permitted,
- * non zero otherwise. It allows that current my have the capability
+ * non zero otherwise. It allows that current may have the capability
  * to override the rules.
  */
 int smk_curacc(char *obj_label, u32 mode)
@@ -168,6 +168,14 @@ int smk_curacc(char *obj_label, u32 mode)
 	if (rc == 0)
 		return 0;
 
+	/*
+	 * Return if a specific label has been designated as the
+	 * only one that gets privilege and current does not
+	 * have that label.
+	 */
+	if (smack_onlycap != NULL && smack_onlycap != current->security)
+		return rc;
+
 	if (capable(CAP_MAC_OVERRIDE))
 		return 0;
 
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 271a835fbbe3..e7c642458ec9 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -39,6 +39,7 @@ enum smk_inos {
 	SMK_DIRECT	= 6,	/* CIPSO level indicating direct label */
 	SMK_AMBIENT	= 7,	/* internet ambient label */
 	SMK_NLTYPE	= 8,	/* label scheme to use by default */
+	SMK_ONLYCAP	= 9,	/* the only "capable" label */
 };
 
 /*
@@ -68,6 +69,16 @@ int smack_net_nltype = NETLBL_NLTYPE_CIPSOV4;
  */
 int smack_cipso_direct = SMACK_CIPSO_DIRECT_DEFAULT;
 
+/*
+ * Unless a process is running with this label even
+ * having CAP_MAC_OVERRIDE isn't enough to grant
+ * privilege to violate MAC policy. If no label is
+ * designated (the NULL case) capabilities apply to
+ * everyone. It is expected that the hat (^) label
+ * will be used if any label is used.
+ */
+char *smack_onlycap;
+
 static int smk_cipso_doi_value = SMACK_CIPSO_DOI_DEFAULT;
 struct smk_list_entry *smack_list;
 
@@ -787,6 +798,85 @@ static const struct file_operations smk_ambient_ops = {
 	.write		= smk_write_ambient,
 };
 
+/**
+ * smk_read_onlycap - read() for /smack/onlycap
+ * @filp: file pointer, not actually used
+ * @buf: where to put the result
+ * @cn: maximum to send along
+ * @ppos: where to start
+ *
+ * Returns number of bytes read or error code, as appropriate
+ */
+static ssize_t smk_read_onlycap(struct file *filp, char __user *buf,
+				size_t cn, loff_t *ppos)
+{
+	char *smack = "";
+	ssize_t rc = -EINVAL;
+	int asize;
+
+	if (*ppos != 0)
+		return 0;
+
+	if (smack_onlycap != NULL)
+		smack = smack_onlycap;
+
+	asize = strlen(smack) + 1;
+
+	if (cn >= asize)
+		rc = simple_read_from_buffer(buf, cn, ppos, smack, asize);
+
+	return rc;
+}
+
+/**
+ * smk_write_onlycap - write() for /smack/onlycap
+ * @filp: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start
+ *
+ * Returns number of bytes written or error code, as appropriate
+ */
+static ssize_t smk_write_onlycap(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	char in[SMK_LABELLEN];
+	char *sp = current->security;
+
+	if (!capable(CAP_MAC_ADMIN))
+		return -EPERM;
+
+	/*
+	 * This can be done using smk_access() but is done
+	 * explicitly for clarity. The smk_access() implementation
+	 * would use smk_access(smack_onlycap, MAY_WRITE)
+	 */
+	if (smack_onlycap != NULL && smack_onlycap != sp)
+		return -EPERM;
+
+	if (count >= SMK_LABELLEN)
+		return -EINVAL;
+
+	if (copy_from_user(in, buf, count) != 0)
+		return -EFAULT;
+
+	/*
+	 * Should the null string be passed in unset the onlycap value.
+	 * This seems like something to be careful with as usually
+	 * smk_import only expects to return NULL for errors. It
+	 * is usually the case that a nullstring or "\n" would be
+	 * bad to pass to smk_import but in fact this is useful here.
+	 */
+	smack_onlycap = smk_import(in, count);
+
+	return count;
+}
+
+static const struct file_operations smk_onlycap_ops = {
+	.read		= smk_read_onlycap,
+	.write		= smk_write_onlycap,
+};
+
 struct option_names {
 	int	o_number;
 	char	*o_name;
@@ -919,6 +1009,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
 			{"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
 		[SMK_NLTYPE]	=
 			{"nltype", &smk_nltype_ops, S_IRUGO|S_IWUSR},
+		[SMK_ONLYCAP]	=
+			{"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
 		/* last one */ {""}
 	};
 
-- 
cgit v1.2.3


From 421fae06be9e0dac45747494756b3580643815f9 Mon Sep 17 00:00:00 2001
From: Vesa-Matti Kari <vmkari@cc.helsinki.fi>
Date: Wed, 6 Aug 2008 18:24:51 +0300
Subject: selinux: conditional expression type validation was off-by-one

expr_isvalid() in conditional.c was off-by-one and allowed
invalid expression type COND_LAST. However, it is this header file
that needs to be fixed. That way the if-statement's disjunction's
second component reads more naturally, "if expr type is greater than
the last allowed value" ( rather than using ">=" in conditional.c):

  if (expr->expr_type <= 0 || expr->expr_type > COND_LAST)

Signed-off-by: Vesa-Matti Kari <vmkari@cc.helsinki.fi>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/conditional.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/selinux/ss/conditional.h b/security/selinux/ss/conditional.h
index 65b9f8366e9c..53ddb013ae57 100644
--- a/security/selinux/ss/conditional.h
+++ b/security/selinux/ss/conditional.h
@@ -28,7 +28,7 @@ struct cond_expr {
 #define COND_XOR	5 /* bool ^ bool */
 #define COND_EQ		6 /* bool == bool */
 #define COND_NEQ	7 /* bool != bool */
-#define COND_LAST	8
+#define COND_LAST	COND_NEQ
 	__u32 expr_type;
 	__u32 bool;
 	struct cond_expr *next;
-- 
cgit v1.2.3


From dbc74c65b3fd841985935f676388c82d6b85c485 Mon Sep 17 00:00:00 2001
From: Vesa-Matti Kari <vmkari@cc.helsinki.fi>
Date: Thu, 7 Aug 2008 03:18:20 +0300
Subject: selinux: Unify for- and while-loop style

Replace "thing != NULL" comparisons with just "thing" to make
the code look more uniform (mixed styles were used even in the
same source file).

Signed-off-by: Vesa-Matti Kari <vmkari@cc.helsinki.fi>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/avtab.c       |  2 +-
 security/selinux/ss/conditional.c | 16 ++++++++--------
 security/selinux/ss/ebitmap.c     |  4 ++--
 security/selinux/ss/hashtab.c     |  6 +++---
 security/selinux/ss/services.c    |  8 ++++----
 security/selinux/ss/sidtab.c      | 12 ++++++------
 6 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'security')

diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c
index e8ae812d6af7..1215b8e47dba 100644
--- a/security/selinux/ss/avtab.c
+++ b/security/selinux/ss/avtab.c
@@ -229,7 +229,7 @@ void avtab_destroy(struct avtab *h)
 
 	for (i = 0; i < h->nslot; i++) {
 		cur = h->htable[i];
-		while (cur != NULL) {
+		while (cur) {
 			temp = cur;
 			cur = cur->next;
 			kmem_cache_free(avtab_node_cachep, temp);
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index f8c850a56ed1..4a4e35cac22b 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -29,7 +29,7 @@ static int cond_evaluate_expr(struct policydb *p, struct cond_expr *expr)
 	int s[COND_EXPR_MAXDEPTH];
 	int sp = -1;
 
-	for (cur = expr; cur != NULL; cur = cur->next) {
+	for (cur = expr; cur; cur = cur->next) {
 		switch (cur->expr_type) {
 		case COND_BOOL:
 			if (sp == (COND_EXPR_MAXDEPTH - 1))
@@ -97,14 +97,14 @@ int evaluate_cond_node(struct policydb *p, struct cond_node *node)
 		if (new_state == -1)
 			printk(KERN_ERR "SELinux: expression result was undefined - disabling all rules.\n");
 		/* turn the rules on or off */
-		for (cur = node->true_list; cur != NULL; cur = cur->next) {
+		for (cur = node->true_list; cur; cur = cur->next) {
 			if (new_state <= 0)
 				cur->node->key.specified &= ~AVTAB_ENABLED;
 			else
 				cur->node->key.specified |= AVTAB_ENABLED;
 		}
 
-		for (cur = node->false_list; cur != NULL; cur = cur->next) {
+		for (cur = node->false_list; cur; cur = cur->next) {
 			/* -1 or 1 */
 			if (new_state)
 				cur->node->key.specified &= ~AVTAB_ENABLED;
@@ -128,7 +128,7 @@ int cond_policydb_init(struct policydb *p)
 static void cond_av_list_destroy(struct cond_av_list *list)
 {
 	struct cond_av_list *cur, *next;
-	for (cur = list; cur != NULL; cur = next) {
+	for (cur = list; cur; cur = next) {
 		next = cur->next;
 		/* the avtab_ptr_t node is destroy by the avtab */
 		kfree(cur);
@@ -139,7 +139,7 @@ static void cond_node_destroy(struct cond_node *node)
 {
 	struct cond_expr *cur_expr, *next_expr;
 
-	for (cur_expr = node->expr; cur_expr != NULL; cur_expr = next_expr) {
+	for (cur_expr = node->expr; cur_expr; cur_expr = next_expr) {
 		next_expr = cur_expr->next;
 		kfree(cur_expr);
 	}
@@ -155,7 +155,7 @@ static void cond_list_destroy(struct cond_node *list)
 	if (list == NULL)
 		return;
 
-	for (cur = list; cur != NULL; cur = next) {
+	for (cur = list; cur; cur = next) {
 		next = cur->next;
 		cond_node_destroy(cur);
 	}
@@ -291,7 +291,7 @@ static int cond_insertf(struct avtab *a, struct avtab_key *k, struct avtab_datum
 					goto err;
 				}
 				found = 0;
-				for (cur = other; cur != NULL; cur = cur->next) {
+				for (cur = other; cur; cur = cur->next) {
 					if (cur->node == node_ptr) {
 						found = 1;
 						break;
@@ -485,7 +485,7 @@ void cond_compute_av(struct avtab *ctab, struct avtab_key *key, struct av_decisi
 	if (!ctab || !key || !avd)
 		return;
 
-	for (node = avtab_search_node(ctab, key); node != NULL;
+	for (node = avtab_search_node(ctab, key); node;
 				node = avtab_search_node_next(node, key->specified)) {
 		if ((u16)(AVTAB_ALLOWED|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_ALLOWED|AVTAB_ENABLED)))
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index ddc275490af8..68c7348d1acc 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -109,7 +109,7 @@ int ebitmap_netlbl_export(struct ebitmap *ebmap,
 	*catmap = c_iter;
 	c_iter->startbit = e_iter->startbit & ~(NETLBL_CATMAP_SIZE - 1);
 
-	while (e_iter != NULL) {
+	while (e_iter) {
 		for (i = 0; i < EBITMAP_UNIT_NUMS; i++) {
 			unsigned int delta, e_startbit, c_endbit;
 
@@ -197,7 +197,7 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap,
 			}
 		}
 		c_iter = c_iter->next;
-	} while (c_iter != NULL);
+	} while (c_iter);
 	if (e_iter != NULL)
 		ebmap->highbit = e_iter->startbit + EBITMAP_SIZE;
 	else
diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c
index 2e7788e13213..933e735bb185 100644
--- a/security/selinux/ss/hashtab.c
+++ b/security/selinux/ss/hashtab.c
@@ -81,7 +81,7 @@ void *hashtab_search(struct hashtab *h, const void *key)
 
 	hvalue = h->hash_value(h, key);
 	cur = h->htable[hvalue];
-	while (cur != NULL && h->keycmp(h, key, cur->key) > 0)
+	while (cur && h->keycmp(h, key, cur->key) > 0)
 		cur = cur->next;
 
 	if (cur == NULL || (h->keycmp(h, key, cur->key) != 0))
@@ -100,7 +100,7 @@ void hashtab_destroy(struct hashtab *h)
 
 	for (i = 0; i < h->size; i++) {
 		cur = h->htable[i];
-		while (cur != NULL) {
+		while (cur) {
 			temp = cur;
 			cur = cur->next;
 			kfree(temp);
@@ -127,7 +127,7 @@ int hashtab_map(struct hashtab *h,
 
 	for (i = 0; i < h->size; i++) {
 		cur = h->htable[i];
-		while (cur != NULL) {
+		while (cur) {
 			ret = apply(cur->key, cur->datum, args);
 			if (ret)
 				return ret;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index b52f923ce680..5a0536bddc63 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -356,7 +356,7 @@ static int context_struct_compute_av(struct context *scontext,
 			avkey.source_type = i + 1;
 			avkey.target_type = j + 1;
 			for (node = avtab_search_node(&policydb.te_avtab, &avkey);
-			     node != NULL;
+			     node;
 			     node = avtab_search_node_next(node, avkey.specified)) {
 				if (node->key.specified == AVTAB_ALLOWED)
 					avd->allowed |= node->datum.data;
@@ -1037,7 +1037,7 @@ static int security_compute_sid(u32 ssid,
 	/* If no permanent rule, also check for enabled conditional rules */
 	if (!avdatum) {
 		node = avtab_search_node(&policydb.te_cond_avtab, &avkey);
-		for (; node != NULL; node = avtab_search_node_next(node, specified)) {
+		for (; node; node = avtab_search_node_next(node, specified)) {
 			if (node->key.specified & AVTAB_ENABLED) {
 				avdatum = &node->datum;
 				break;
@@ -2050,7 +2050,7 @@ int security_set_bools(int len, int *values)
 			policydb.bool_val_to_struct[i]->state = 0;
 	}
 
-	for (cur = policydb.cond_list; cur != NULL; cur = cur->next) {
+	for (cur = policydb.cond_list; cur; cur = cur->next) {
 		rc = evaluate_cond_node(&policydb, cur);
 		if (rc)
 			goto out;
@@ -2102,7 +2102,7 @@ static int security_preserve_bools(struct policydb *p)
 		if (booldatum)
 			booldatum->state = bvalues[i];
 	}
-	for (cur = p->cond_list; cur != NULL; cur = cur->next) {
+	for (cur = p->cond_list; cur; cur = cur->next) {
 		rc = evaluate_cond_node(p, cur);
 		if (rc)
 			goto out;
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index a81ded104129..e817989764cd 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -43,7 +43,7 @@ int sidtab_insert(struct sidtab *s, u32 sid, struct context *context)
 	hvalue = SIDTAB_HASH(sid);
 	prev = NULL;
 	cur = s->htable[hvalue];
-	while (cur != NULL && sid > cur->sid) {
+	while (cur && sid > cur->sid) {
 		prev = cur;
 		cur = cur->next;
 	}
@@ -92,7 +92,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force)
 
 	hvalue = SIDTAB_HASH(sid);
 	cur = s->htable[hvalue];
-	while (cur != NULL && sid > cur->sid)
+	while (cur && sid > cur->sid)
 		cur = cur->next;
 
 	if (force && cur && sid == cur->sid && cur->context.len)
@@ -103,7 +103,7 @@ static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force)
 		sid = SECINITSID_UNLABELED;
 		hvalue = SIDTAB_HASH(sid);
 		cur = s->htable[hvalue];
-		while (cur != NULL && sid > cur->sid)
+		while (cur && sid > cur->sid)
 			cur = cur->next;
 		if (!cur || sid != cur->sid)
 			return NULL;
@@ -136,7 +136,7 @@ int sidtab_map(struct sidtab *s,
 
 	for (i = 0; i < SIDTAB_SIZE; i++) {
 		cur = s->htable[i];
-		while (cur != NULL) {
+		while (cur) {
 			rc = apply(cur->sid, &cur->context, args);
 			if (rc)
 				goto out;
@@ -155,7 +155,7 @@ static inline u32 sidtab_search_context(struct sidtab *s,
 
 	for (i = 0; i < SIDTAB_SIZE; i++) {
 		cur = s->htable[i];
-		while (cur != NULL) {
+		while (cur) {
 			if (context_cmp(&cur->context, context))
 				return cur->sid;
 			cur = cur->next;
@@ -242,7 +242,7 @@ void sidtab_destroy(struct sidtab *s)
 
 	for (i = 0; i < SIDTAB_SIZE; i++) {
 		cur = s->htable[i];
-		while (cur != NULL) {
+		while (cur) {
 			temp = cur;
 			cur = cur->next;
 			context_destroy(&temp->context);
-- 
cgit v1.2.3


From 3f23d815c5049c9d7022226cec2242e384dd0b43 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sun, 17 Aug 2008 21:44:22 -0700
Subject: security: add/fix security kernel-doc

Add security/inode.c functions to the kernel-api docbook.
Use '%' on constants in kernel-doc notation.
Fix several typos/spellos in security function descriptions.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 Documentation/DocBook/kernel-api.tmpl |  1 +
 security/inode.c                      | 33 ++++++++++++++++-----------------
 security/security.c                   |  8 ++++----
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'security')

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index b7b1482f6e04..cd0e6d5370d7 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -283,6 +283,7 @@ X!Earch/x86/kernel/mca_32.c
   <chapter id="security">
      <title>Security Framework</title>
 !Isecurity/security.c
+!Esecurity/inode.c
   </chapter>
 
   <chapter id="audit">
diff --git a/security/inode.c b/security/inode.c
index acc6cf0d7900..ca4958ebad8d 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -190,7 +190,7 @@ static int create_by_name(const char *name, mode_t mode,
  * @name: a pointer to a string containing the name of the file to create.
  * @mode: the permission that the file should have
  * @parent: a pointer to the parent dentry for this file.  This should be a
- *          directory dentry if set.  If this paramater is NULL, then the
+ *          directory dentry if set.  If this parameter is %NULL, then the
  *          file will be created in the root of the securityfs filesystem.
  * @data: a pointer to something that the caller will want to get to later
  *        on.  The inode.i_private pointer will point to this value on
@@ -199,18 +199,18 @@ static int create_by_name(const char *name, mode_t mode,
  *        this file.
  *
  * This is the basic "create a file" function for securityfs.  It allows for a
- * wide range of flexibility in createing a file, or a directory (if you
+ * wide range of flexibility in creating a file, or a directory (if you
  * want to create a directory, the securityfs_create_dir() function is
- * recommended to be used instead.)
+ * recommended to be used instead).
  *
- * This function will return a pointer to a dentry if it succeeds.  This
+ * This function returns a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the securityfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, NULL will be returned.
+ * you are responsible here).  If an error occurs, %NULL is returned.
  *
- * If securityfs is not enabled in the kernel, the value -ENODEV will be
+ * If securityfs is not enabled in the kernel, the value %-ENODEV is
  * returned.  It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
 struct dentry *securityfs_create_file(const char *name, mode_t mode,
@@ -252,19 +252,19 @@ EXPORT_SYMBOL_GPL(securityfs_create_file);
  * @name: a pointer to a string containing the name of the directory to
  *        create.
  * @parent: a pointer to the parent dentry for this file.  This should be a
- *          directory dentry if set.  If this paramater is NULL, then the
+ *          directory dentry if set.  If this parameter is %NULL, then the
  *          directory will be created in the root of the securityfs filesystem.
  *
- * This function creates a directory in securityfs with the given name.
+ * This function creates a directory in securityfs with the given @name.
  *
- * This function will return a pointer to a dentry if it succeeds.  This
+ * This function returns a pointer to a dentry if it succeeds.  This
  * pointer must be passed to the securityfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, NULL will be returned.
+ * you are responsible here).  If an error occurs, %NULL will be returned.
  *
- * If securityfs is not enabled in the kernel, the value -ENODEV will be
+ * If securityfs is not enabled in the kernel, the value %-ENODEV is
  * returned.  It is not wise to check for this value, but rather, check for
- * NULL or !NULL instead as to eliminate the need for #ifdef in the calling
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
  * code.
  */
 struct dentry *securityfs_create_dir(const char *name, struct dentry *parent)
@@ -278,16 +278,15 @@ EXPORT_SYMBOL_GPL(securityfs_create_dir);
 /**
  * securityfs_remove - removes a file or directory from the securityfs filesystem
  *
- * @dentry: a pointer to a the dentry of the file or directory to be
- *          removed.
+ * @dentry: a pointer to a the dentry of the file or directory to be removed.
  *
  * This function removes a file or directory in securityfs that was previously
  * created with a call to another securityfs function (like
  * securityfs_create_file() or variants thereof.)
  *
  * This function is required to be called in order for the file to be
- * removed, no automatic cleanup of files will happen when a module is
- * removed, you are responsible here.
+ * removed. No automatic cleanup of files will happen when a module is
+ * removed; you are responsible here.
  */
 void securityfs_remove(struct dentry *dentry)
 {
diff --git a/security/security.c b/security/security.c
index ff7068727757..d953d251fdca 100644
--- a/security/security.c
+++ b/security/security.c
@@ -82,8 +82,8 @@ __setup("security=", choose_lsm);
  *
  * Return true if:
  *	-The passed LSM is the one chosen by user at boot time,
- *	-or user didsn't specify a specific LSM and we're the first to ask
- *	 for registeration permissoin,
+ *	-or user didn't specify a specific LSM and we're the first to ask
+ *	 for registration permission,
  *	-or the passed LSM is currently loaded.
  * Otherwise, return false.
  */
@@ -101,13 +101,13 @@ int __init security_module_enable(struct security_operations *ops)
  * register_security - registers a security framework with the kernel
  * @ops: a pointer to the struct security_options that is to be registered
  *
- * This function is to allow a security module to register itself with the
+ * This function allows a security module to register itself with the
  * kernel security subsystem.  Some rudimentary checking is done on the @ops
  * value passed to this function. You'll need to check first if your LSM
  * is allowed to register its @ops by calling security_module_enable(@ops).
  *
  * If there is already a security module registered with the kernel,
- * an error will be returned.  Otherwise 0 is returned on success.
+ * an error will be returned.  Otherwise %0 is returned on success.
  */
 int register_security(struct security_operations *ops)
 {
-- 
cgit v1.2.3


From da31894ed7b654e2e1741e7ac4ef6c15be0dd14b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 22 Aug 2008 11:35:57 -0400
Subject: securityfs: do not depend on CONFIG_SECURITY

Add a new Kconfig option SECURITYFS which will build securityfs support
but does not require CONFIG_SECURITY.  The only current user of
securityfs does not depend on CONFIG_SECURITY and there is no reason the
full LSM needs to be built to build this fs.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/char/tpm/Kconfig |  1 +
 include/linux/security.h | 54 +++++++++++++++++++++++++++---------------------
 security/Kconfig         |  8 +++++++
 security/Makefile        |  3 ++-
 4 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'security')

diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 3738cfa209ff..f5fc64f89c5c 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -6,6 +6,7 @@ menuconfig TCG_TPM
 	tristate "TPM Hardware Support"
 	depends on HAS_IOMEM
 	depends on EXPERIMENTAL
+	select SECURITYFS
 	---help---
 	  If you have a TPM security chip in your system, which
 	  implements the Trusted Computing Group's specification,
diff --git a/include/linux/security.h b/include/linux/security.h
index 80c4d002864c..f5c4a51eb42e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1560,11 +1560,6 @@ struct security_operations {
 extern int security_init(void);
 extern int security_module_enable(struct security_operations *ops);
 extern int register_security(struct security_operations *ops);
-extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
-					     struct dentry *parent, void *data,
-					     const struct file_operations *fops);
-extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
-extern void securityfs_remove(struct dentry *dentry);
 
 /* Security operations */
 int security_ptrace_may_access(struct task_struct *child, unsigned int mode);
@@ -2424,25 +2419,6 @@ static inline int security_netlink_recv(struct sk_buff *skb, int cap)
 	return cap_netlink_recv(skb, cap);
 }
 
-static inline struct dentry *securityfs_create_dir(const char *name,
-					struct dentry *parent)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline struct dentry *securityfs_create_file(const char *name,
-						mode_t mode,
-						struct dentry *parent,
-						void *data,
-						const struct file_operations *fops)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void securityfs_remove(struct dentry *dentry)
-{
-}
-
 static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 {
 	return -EOPNOTSUPP;
@@ -2806,5 +2782,35 @@ static inline void security_audit_rule_free(void *lsmrule)
 #endif /* CONFIG_SECURITY */
 #endif /* CONFIG_AUDIT */
 
+#ifdef CONFIG_SECURITYFS
+
+extern struct dentry *securityfs_create_file(const char *name, mode_t mode,
+					     struct dentry *parent, void *data,
+					     const struct file_operations *fops);
+extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent);
+extern void securityfs_remove(struct dentry *dentry);
+
+#else /* CONFIG_SECURITYFS */
+
+static inline struct dentry *securityfs_create_dir(const char *name,
+						   struct dentry *parent)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline struct dentry *securityfs_create_file(const char *name,
+						    mode_t mode,
+						    struct dentry *parent,
+						    void *data,
+						    const struct file_operations *fops)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void securityfs_remove(struct dentry *dentry)
+{}
+
+#endif
+
 #endif /* ! __LINUX_SECURITY_H */
 
diff --git a/security/Kconfig b/security/Kconfig
index 559293922a47..d9f47ce7e207 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -51,6 +51,14 @@ config SECURITY
 
 	  If you are unsure how to answer this question, answer N.
 
+config SECURITYFS
+	bool "Enable the securityfs filesystem"
+	help
+	  This will build the securityfs filesystem.  It is currently used by
+	  the TPM bios character driver.  It is not used by SELinux or SMACK.
+
+	  If you are unsure how to answer this question, answer N.
+
 config SECURITY_NETWORK
 	bool "Socket and Networking Security Hooks"
 	depends on SECURITY
diff --git a/security/Makefile b/security/Makefile
index f65426099aa6..c05c127fff9a 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -10,7 +10,8 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 obj-y		+= commoncap.o
 
 # Object file lists
-obj-$(CONFIG_SECURITY)			+= security.o capability.o inode.o
+obj-$(CONFIG_SECURITY)			+= security.o capability.o
+obj-$(CONFIG_SECURITYFS)		+= inode.o
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)		+= selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)		+= smack/built-in.o
-- 
cgit v1.2.3


From d9250dea3f89fe808a525f08888016b495240ed4 Mon Sep 17 00:00:00 2001
From: KaiGai Kohei <kaigai@ak.jp.nec.com>
Date: Thu, 28 Aug 2008 16:35:57 +0900
Subject: SELinux: add boundary support and thread context assignment

The purpose of this patch is to assign per-thread security context
under a constraint. It enables multi-threaded server application
to kick a request handler with its fair security context, and
helps some of userspace object managers to handle user's request.

When we assign a per-thread security context, it must not have wider
permissions than the original one. Because a multi-threaded process
shares a single local memory, an arbitary per-thread security context
also means another thread can easily refer violated information.

The constraint on a per-thread security context requires a new domain
has to be equal or weaker than its original one, when it tries to assign
a per-thread security context.

Bounds relationship between two types is a way to ensure a domain can
never have wider permission than its bounds. We can define it in two
explicit or implicit ways.

The first way is using new TYPEBOUNDS statement. It enables to define
a boundary of types explicitly. The other one expand the concept of
existing named based hierarchy. If we defines a type with "." separated
name like "httpd_t.php", toolchain implicitly set its bounds on "httpd_t".

This feature requires a new policy version.
The 24th version (POLICYDB_VERSION_BOUNDARY) enables to ship them into
kernel space, and the following patch enables to handle it.

Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/avc.c              |   2 +-
 security/selinux/hooks.c            |  15 ++-
 security/selinux/include/avc.h      |   4 +
 security/selinux/include/security.h |  15 ++-
 security/selinux/ss/policydb.c      | 205 +++++++++++++++++++++++++++++++++---
 security/selinux/ss/policydb.h      |   5 +
 security/selinux/ss/services.c      | 172 +++++++++++++++++++++++++++++-
 7 files changed, 398 insertions(+), 20 deletions(-)

(limited to 'security')

diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 114b4b4c97b2..cb30c7e350b3 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -136,7 +136,7 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
  * @tclass: target security class
  * @av: access vector
  */
-static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
+void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
 {
 	const char **common_pts = NULL;
 	u32 common_base = 0;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6b5790bba8f9..89f446d86054 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5226,8 +5226,12 @@ static int selinux_setprocattr(struct task_struct *p,
 
 		if (sid == 0)
 			return -EINVAL;
-
-		/* Only allow single threaded processes to change context */
+		/*
+		 * SELinux allows to change context in the following case only.
+		 *  - Single threaded processes.
+		 *  - Multi threaded processes intend to change its context into
+		 *    more restricted domain (defined by TYPEBOUNDS statement).
+		 */
 		if (atomic_read(&p->mm->mm_users) != 1) {
 			struct task_struct *g, *t;
 			struct mm_struct *mm = p->mm;
@@ -5235,11 +5239,16 @@ static int selinux_setprocattr(struct task_struct *p,
 			do_each_thread(g, t) {
 				if (t->mm == mm && t != p) {
 					read_unlock(&tasklist_lock);
-					return -EPERM;
+					error = security_bounded_transition(tsec->sid, sid);
+					if (!error)
+						goto boundary_ok;
+
+					return error;
 				}
 			} while_each_thread(g, t);
 			read_unlock(&tasklist_lock);
 		}
+boundary_ok:
 
 		/* Check permissions for the transition. */
 		error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS,
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index 7b9769f5e775..d12ff1a9c0aa 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -12,6 +12,7 @@
 #include <linux/kdev_t.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
+#include <linux/audit.h>
 #include <linux/in6.h>
 #include <linux/path.h>
 #include <asm/system.h>
@@ -126,6 +127,9 @@ int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid,
 		     u32 events, u32 ssid, u32 tsid,
 		     u16 tclass, u32 perms);
 
+/* Shows permission in human readable form */
+void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av);
+
 /* Exported to selinuxfs */
 int avc_get_hash_stats(char *page);
 extern unsigned int avc_cache_threshold;
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 7c543003d653..72447370bc95 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -27,13 +27,14 @@
 #define POLICYDB_VERSION_RANGETRANS	21
 #define POLICYDB_VERSION_POLCAP		22
 #define POLICYDB_VERSION_PERMISSIVE	23
+#define POLICYDB_VERSION_BOUNDARY	24
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
 #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
 #define POLICYDB_VERSION_MAX	CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
 #else
-#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_PERMISSIVE
+#define POLICYDB_VERSION_MAX	POLICYDB_VERSION_BOUNDARY
 #endif
 
 #define CONTEXT_MNT	0x01
@@ -62,6 +63,16 @@ enum {
 extern int selinux_policycap_netpeer;
 extern int selinux_policycap_openperm;
 
+/*
+ * type_datum properties
+ * available at the kernel policy version >= POLICYDB_VERSION_BOUNDARY
+ */
+#define TYPEDATUM_PROPERTY_PRIMARY	0x0001
+#define TYPEDATUM_PROPERTY_ATTRIBUTE	0x0002
+
+/* limitation of boundary depth  */
+#define POLICYDB_BOUNDS_MAXDEPTH	4
+
 int security_load_policy(void *data, size_t len);
 
 int security_policycap_supported(unsigned int req_cap);
@@ -117,6 +128,8 @@ int security_node_sid(u16 domain, void *addr, u32 addrlen,
 int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
 				 u16 tclass);
 
+int security_bounded_transition(u32 oldsid, u32 newsid);
+
 int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
 
 int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type,
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 26646305dc0e..72e4a54973aa 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/audit.h>
 #include "security.h"
 
 #include "policydb.h"
@@ -116,7 +117,12 @@ static struct policydb_compat_info policydb_compat[] = {
 		.version	= POLICYDB_VERSION_PERMISSIVE,
 		.sym_num	= SYM_NUM,
 		.ocon_num	= OCON_NUM,
-	}
+	},
+	{
+		.version	= POLICYDB_VERSION_BOUNDARY,
+		.sym_num	= SYM_NUM,
+		.ocon_num	= OCON_NUM,
+	},
 };
 
 static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -254,7 +260,9 @@ static int role_index(void *key, void *datum, void *datap)
 
 	role = datum;
 	p = datap;
-	if (!role->value || role->value > p->p_roles.nprim)
+	if (!role->value
+	    || role->value > p->p_roles.nprim
+	    || role->bounds > p->p_roles.nprim)
 		return -EINVAL;
 	p->p_role_val_to_name[role->value - 1] = key;
 	p->role_val_to_struct[role->value - 1] = role;
@@ -270,9 +278,12 @@ static int type_index(void *key, void *datum, void *datap)
 	p = datap;
 
 	if (typdatum->primary) {
-		if (!typdatum->value || typdatum->value > p->p_types.nprim)
+		if (!typdatum->value
+		    || typdatum->value > p->p_types.nprim
+		    || typdatum->bounds > p->p_types.nprim)
 			return -EINVAL;
 		p->p_type_val_to_name[typdatum->value - 1] = key;
+		p->type_val_to_struct[typdatum->value - 1] = typdatum;
 	}
 
 	return 0;
@@ -285,7 +296,9 @@ static int user_index(void *key, void *datum, void *datap)
 
 	usrdatum = datum;
 	p = datap;
-	if (!usrdatum->value || usrdatum->value > p->p_users.nprim)
+	if (!usrdatum->value
+	    || usrdatum->value > p->p_users.nprim
+	    || usrdatum->bounds > p->p_users.nprim)
 		return -EINVAL;
 	p->p_user_val_to_name[usrdatum->value - 1] = key;
 	p->user_val_to_struct[usrdatum->value - 1] = usrdatum;
@@ -438,6 +451,14 @@ static int policydb_index_others(struct policydb *p)
 		goto out;
 	}
 
+	p->type_val_to_struct =
+		kmalloc(p->p_types.nprim * sizeof(*(p->type_val_to_struct)),
+			GFP_KERNEL);
+	if (!p->type_val_to_struct) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
 	if (cond_init_bool_indexes(p)) {
 		rc = -ENOMEM;
 		goto out;
@@ -625,6 +646,7 @@ void policydb_destroy(struct policydb *p)
 	kfree(p->class_val_to_struct);
 	kfree(p->role_val_to_struct);
 	kfree(p->user_val_to_struct);
+	kfree(p->type_val_to_struct);
 
 	avtab_destroy(&p->te_avtab);
 
@@ -1176,8 +1198,8 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
 {
 	char *key = NULL;
 	struct role_datum *role;
-	int rc;
-	__le32 buf[2];
+	int rc, to_read = 2;
+	__le32 buf[3];
 	u32 len;
 
 	role = kzalloc(sizeof(*role), GFP_KERNEL);
@@ -1186,12 +1208,17 @@ static int role_read(struct policydb *p, struct hashtab *h, void *fp)
 		goto out;
 	}
 
-	rc = next_entry(buf, fp, sizeof buf);
+	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+		to_read = 3;
+
+	rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
 	if (rc < 0)
 		goto bad;
 
 	len = le32_to_cpu(buf[0]);
 	role->value = le32_to_cpu(buf[1]);
+	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+		role->bounds = le32_to_cpu(buf[2]);
 
 	key = kmalloc(len + 1, GFP_KERNEL);
 	if (!key) {
@@ -1236,8 +1263,8 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
 {
 	char *key = NULL;
 	struct type_datum *typdatum;
-	int rc;
-	__le32 buf[3];
+	int rc, to_read = 3;
+	__le32 buf[4];
 	u32 len;
 
 	typdatum = kzalloc(sizeof(*typdatum), GFP_KERNEL);
@@ -1246,13 +1273,27 @@ static int type_read(struct policydb *p, struct hashtab *h, void *fp)
 		return rc;
 	}
 
-	rc = next_entry(buf, fp, sizeof buf);
+	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+		to_read = 4;
+
+	rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
 	if (rc < 0)
 		goto bad;
 
 	len = le32_to_cpu(buf[0]);
 	typdatum->value = le32_to_cpu(buf[1]);
-	typdatum->primary = le32_to_cpu(buf[2]);
+	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY) {
+		u32 prop = le32_to_cpu(buf[2]);
+
+		if (prop & TYPEDATUM_PROPERTY_PRIMARY)
+			typdatum->primary = 1;
+		if (prop & TYPEDATUM_PROPERTY_ATTRIBUTE)
+			typdatum->attribute = 1;
+
+		typdatum->bounds = le32_to_cpu(buf[3]);
+	} else {
+		typdatum->primary = le32_to_cpu(buf[2]);
+	}
 
 	key = kmalloc(len + 1, GFP_KERNEL);
 	if (!key) {
@@ -1309,8 +1350,8 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
 {
 	char *key = NULL;
 	struct user_datum *usrdatum;
-	int rc;
-	__le32 buf[2];
+	int rc, to_read = 2;
+	__le32 buf[3];
 	u32 len;
 
 	usrdatum = kzalloc(sizeof(*usrdatum), GFP_KERNEL);
@@ -1319,12 +1360,17 @@ static int user_read(struct policydb *p, struct hashtab *h, void *fp)
 		goto out;
 	}
 
-	rc = next_entry(buf, fp, sizeof buf);
+	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+		to_read = 3;
+
+	rc = next_entry(buf, fp, sizeof(buf[0]) * to_read);
 	if (rc < 0)
 		goto bad;
 
 	len = le32_to_cpu(buf[0]);
 	usrdatum->value = le32_to_cpu(buf[1]);
+	if (p->policyvers >= POLICYDB_VERSION_BOUNDARY)
+		usrdatum->bounds = le32_to_cpu(buf[2]);
 
 	key = kmalloc(len + 1, GFP_KERNEL);
 	if (!key) {
@@ -1465,6 +1511,133 @@ static int (*read_f[SYM_NUM]) (struct policydb *p, struct hashtab *h, void *fp)
 	cat_read,
 };
 
+static int user_bounds_sanity_check(void *key, void *datum, void *datap)
+{
+	struct user_datum *upper, *user;
+	struct policydb *p = datap;
+	int depth = 0;
+
+	upper = user = datum;
+	while (upper->bounds) {
+		struct ebitmap_node *node;
+		unsigned long bit;
+
+		if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
+			printk(KERN_ERR "SELinux: user %s: "
+			       "too deep or looped boundary",
+			       (char *) key);
+			return -EINVAL;
+		}
+
+		upper = p->user_val_to_struct[upper->bounds - 1];
+		ebitmap_for_each_positive_bit(&user->roles, node, bit) {
+			if (ebitmap_get_bit(&upper->roles, bit))
+				continue;
+
+			printk(KERN_ERR
+			       "SELinux: boundary violated policy: "
+			       "user=%s role=%s bounds=%s\n",
+			       p->p_user_val_to_name[user->value - 1],
+			       p->p_role_val_to_name[bit],
+			       p->p_user_val_to_name[upper->value - 1]);
+
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int role_bounds_sanity_check(void *key, void *datum, void *datap)
+{
+	struct role_datum *upper, *role;
+	struct policydb *p = datap;
+	int depth = 0;
+
+	upper = role = datum;
+	while (upper->bounds) {
+		struct ebitmap_node *node;
+		unsigned long bit;
+
+		if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
+			printk(KERN_ERR "SELinux: role %s: "
+			       "too deep or looped bounds\n",
+			       (char *) key);
+			return -EINVAL;
+		}
+
+		upper = p->role_val_to_struct[upper->bounds - 1];
+		ebitmap_for_each_positive_bit(&role->types, node, bit) {
+			if (ebitmap_get_bit(&upper->types, bit))
+				continue;
+
+			printk(KERN_ERR
+			       "SELinux: boundary violated policy: "
+			       "role=%s type=%s bounds=%s\n",
+			       p->p_role_val_to_name[role->value - 1],
+			       p->p_type_val_to_name[bit],
+			       p->p_role_val_to_name[upper->value - 1]);
+
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int type_bounds_sanity_check(void *key, void *datum, void *datap)
+{
+	struct type_datum *upper, *type;
+	struct policydb *p = datap;
+	int depth = 0;
+
+	upper = type = datum;
+	while (upper->bounds) {
+		if (++depth == POLICYDB_BOUNDS_MAXDEPTH) {
+			printk(KERN_ERR "SELinux: type %s: "
+			       "too deep or looped boundary\n",
+			       (char *) key);
+			return -EINVAL;
+		}
+
+		upper = p->type_val_to_struct[upper->bounds - 1];
+		if (upper->attribute) {
+			printk(KERN_ERR "SELinux: type %s: "
+			       "bounded by attribute %s",
+			       (char *) key,
+			       p->p_type_val_to_name[upper->value - 1]);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int policydb_bounds_sanity_check(struct policydb *p)
+{
+	int rc;
+
+	if (p->policyvers < POLICYDB_VERSION_BOUNDARY)
+		return 0;
+
+	rc = hashtab_map(p->p_users.table,
+			 user_bounds_sanity_check, p);
+	if (rc)
+		return rc;
+
+	rc = hashtab_map(p->p_roles.table,
+			 role_bounds_sanity_check, p);
+	if (rc)
+		return rc;
+
+	rc = hashtab_map(p->p_types.table,
+			 type_bounds_sanity_check, p);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 extern int ss_initialized;
 
 /*
@@ -1961,6 +2134,10 @@ int policydb_read(struct policydb *p, void *fp)
 				goto bad;
 	}
 
+	rc = policydb_bounds_sanity_check(p);
+	if (rc)
+		goto bad;
+
 	rc = 0;
 out:
 	return rc;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index 4253370fda6a..55152d498b53 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -61,6 +61,7 @@ struct class_datum {
 /* Role attributes */
 struct role_datum {
 	u32 value;			/* internal role value */
+	u32 bounds;			/* boundary of role */
 	struct ebitmap dominates;	/* set of roles dominated by this role */
 	struct ebitmap types;		/* set of authorized types for role */
 };
@@ -81,12 +82,15 @@ struct role_allow {
 /* Type attributes */
 struct type_datum {
 	u32 value;		/* internal type value */
+	u32 bounds;		/* boundary of type */
 	unsigned char primary;	/* primary name? */
+	unsigned char attribute;/* attribute ?*/
 };
 
 /* User attributes */
 struct user_datum {
 	u32 value;			/* internal user value */
+	u32 bounds;			/* bounds of user */
 	struct ebitmap roles;		/* set of authorized roles for user */
 	struct mls_range range;		/* MLS range (min - max) for user */
 	struct mls_level dfltlevel;	/* default login MLS level for user */
@@ -209,6 +213,7 @@ struct policydb {
 	struct class_datum **class_val_to_struct;
 	struct role_datum **role_val_to_struct;
 	struct user_datum **user_val_to_struct;
+	struct type_datum **type_val_to_struct;
 
 	/* type enforcement access vectors and transitions */
 	struct avtab te_avtab;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 5a0536bddc63..4f233d9960e7 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -88,6 +88,11 @@ static u32 latest_granting;
 static int context_struct_to_string(struct context *context, char **scontext,
 				    u32 *scontext_len);
 
+static int context_struct_compute_av(struct context *scontext,
+				     struct context *tcontext,
+				     u16 tclass,
+				     u32 requested,
+				     struct av_decision *avd);
 /*
  * Return the boolean value of a constraint expression
  * when it is applied to the specified source and target
@@ -273,6 +278,100 @@ mls_ops:
 	return s[0];
 }
 
+/*
+ * security_boundary_permission - drops violated permissions
+ * on boundary constraint.
+ */
+static void type_attribute_bounds_av(struct context *scontext,
+				     struct context *tcontext,
+				     u16 tclass,
+				     u32 requested,
+				     struct av_decision *avd)
+{
+	struct context lo_scontext;
+	struct context lo_tcontext;
+	struct av_decision lo_avd;
+	struct type_datum *source
+		= policydb.type_val_to_struct[scontext->type - 1];
+	struct type_datum *target
+		= policydb.type_val_to_struct[tcontext->type - 1];
+	u32 masked = 0;
+
+	if (source->bounds) {
+		memset(&lo_avd, 0, sizeof(lo_avd));
+
+		memcpy(&lo_scontext, scontext, sizeof(lo_scontext));
+		lo_scontext.type = source->bounds;
+
+		context_struct_compute_av(&lo_scontext,
+					  tcontext,
+					  tclass,
+					  requested,
+					  &lo_avd);
+		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
+			return;		/* no masked permission */
+		masked = ~lo_avd.allowed & avd->allowed;
+	}
+
+	if (target->bounds) {
+		memset(&lo_avd, 0, sizeof(lo_avd));
+
+		memcpy(&lo_tcontext, tcontext, sizeof(lo_tcontext));
+		lo_tcontext.type = target->bounds;
+
+		context_struct_compute_av(scontext,
+					  &lo_tcontext,
+					  tclass,
+					  requested,
+					  &lo_avd);
+		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
+			return;		/* no masked permission */
+		masked = ~lo_avd.allowed & avd->allowed;
+	}
+
+	if (source->bounds && target->bounds) {
+		memset(&lo_avd, 0, sizeof(lo_avd));
+		/*
+		 * lo_scontext and lo_tcontext are already
+		 * set up.
+		 */
+
+		context_struct_compute_av(&lo_scontext,
+					  &lo_tcontext,
+					  tclass,
+					  requested,
+					  &lo_avd);
+		if ((lo_avd.allowed & avd->allowed) == avd->allowed)
+			return;		/* no masked permission */
+		masked = ~lo_avd.allowed & avd->allowed;
+	}
+
+	if (masked) {
+		struct audit_buffer *ab;
+		char *stype_name
+			= policydb.p_type_val_to_name[source->value - 1];
+		char *ttype_name
+			= policydb.p_type_val_to_name[target->value - 1];
+		char *tclass_name
+			= policydb.p_class_val_to_name[tclass - 1];
+
+		/* mask violated permissions */
+		avd->allowed &= ~masked;
+
+		/* notice to userspace via audit message */
+		ab = audit_log_start(current->audit_context,
+				     GFP_ATOMIC, AUDIT_SELINUX_ERR);
+		if (!ab)
+			return;
+
+		audit_log_format(ab, "av boundary violation: "
+				 "source=%s target=%s tclass=%s",
+				 stype_name, ttype_name, tclass_name);
+		avc_dump_av(ab, tclass, masked);
+		audit_log_end(ab);
+	}
+}
+
 /*
  * Compute access vectors based on a context structure pair for
  * the permissions in a particular class.
@@ -404,6 +503,14 @@ static int context_struct_compute_av(struct context *scontext,
 							PROCESS__DYNTRANSITION);
 	}
 
+	/*
+	 * If the given source and target types have boundary
+	 * constraint, lazy checks have to mask any violated
+	 * permission and notice it to userspace via audit.
+	 */
+	type_attribute_bounds_av(scontext, tcontext,
+				 tclass, requested, avd);
+
 	return 0;
 
 inval_class:
@@ -549,6 +656,69 @@ out:
 	return rc;
 }
 
+/*
+ * security_bounded_transition - check whether the given
+ * transition is directed to bounded, or not.
+ * It returns 0, if @newsid is bounded by @oldsid.
+ * Otherwise, it returns error code.
+ *
+ * @oldsid : current security identifier
+ * @newsid : destinated security identifier
+ */
+int security_bounded_transition(u32 old_sid, u32 new_sid)
+{
+	struct context *old_context, *new_context;
+	struct type_datum *type;
+	int index;
+	int rc = -EINVAL;
+
+	read_lock(&policy_rwlock);
+
+	old_context = sidtab_search(&sidtab, old_sid);
+	if (!old_context) {
+		printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n",
+		       __func__, old_sid);
+		goto out;
+	}
+
+	new_context = sidtab_search(&sidtab, new_sid);
+	if (!new_context) {
+		printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n",
+		       __func__, new_sid);
+		goto out;
+	}
+
+	/* type/domain unchaned */
+	if (old_context->type == new_context->type) {
+		rc = 0;
+		goto out;
+	}
+
+	index = new_context->type;
+	while (true) {
+		type = policydb.type_val_to_struct[index - 1];
+		BUG_ON(!type);
+
+		/* not bounded anymore */
+		if (!type->bounds) {
+			rc = -EPERM;
+			break;
+		}
+
+		/* @newsid is bounded by @oldsid */
+		if (type->bounds == old_context->type) {
+			rc = 0;
+			break;
+		}
+		index = type->bounds;
+	}
+out:
+	read_unlock(&policy_rwlock);
+
+	return rc;
+}
+
+
 /**
  * security_compute_av - Compute access vector decisions.
  * @ssid: source security identifier
@@ -794,7 +964,7 @@ static int string_to_context_struct(struct policydb *pol,
 	*p++ = 0;
 
 	typdatum = hashtab_search(pol->p_types.table, scontextp);
-	if (!typdatum)
+	if (!typdatum || typdatum->attribute)
 		goto out;
 
 	ctx->type = typdatum->value;
-- 
cgit v1.2.3


From 36fd71d293898a59b14e49da1f6e81c1a58f2035 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 2 Sep 2008 14:35:52 -0700
Subject: devcgroup: fix race against rmdir()

During the use of a dev_cgroup, we should guarantee the corresponding
cgroup won't be deleted (i.e.  via rmdir).  This can be done through
css_get(&dev_cgroup->css), but here we can just get and use the dev_cgroup
under rcu_read_lock.

And also remove checking NULL dev_cgroup, it won't be NULL since a task
always belongs to a cgroup.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 security/device_cgroup.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'security')

diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 7bd296cca041..46f23971f7e4 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -508,12 +508,11 @@ int devcgroup_inode_permission(struct inode *inode, int mask)
 		return 0;
 	if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))
 		return 0;
-	dev_cgroup = css_to_devcgroup(task_subsys_state(current,
-				devices_subsys_id));
-	if (!dev_cgroup)
-		return 0;
 
 	rcu_read_lock();
+
+	dev_cgroup = task_devcgroup(current);
+
 	list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
 		if (wh->type & DEV_ALL)
 			goto acc_check;
@@ -533,6 +532,7 @@ acc_check:
 		rcu_read_unlock();
 		return 0;
 	}
+
 	rcu_read_unlock();
 
 	return -EPERM;
@@ -543,12 +543,10 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
 	struct dev_cgroup *dev_cgroup;
 	struct dev_whitelist_item *wh;
 
-	dev_cgroup = css_to_devcgroup(task_subsys_state(current,
-				devices_subsys_id));
-	if (!dev_cgroup)
-		return 0;
-
 	rcu_read_lock();
+
+	dev_cgroup = task_devcgroup(current);
+
 	list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
 		if (wh->type & DEV_ALL)
 			goto acc_check;
@@ -566,6 +564,8 @@ acc_check:
 		rcu_read_unlock();
 		return 0;
 	}
+
 	rcu_read_unlock();
+
 	return -EPERM;
 }
-- 
cgit v1.2.3


From 8e531af90f3940615623dc0aa6c94866a6773601 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 3 Sep 2008 11:49:47 -0400
Subject: SELinux: memory leak in security_context_to_sid_core

Fix a bug and a philosophical decision about who handles errors.

security_context_to_sid_core() was leaking a context in the common case.
This was causing problems on fedora systems which recently have started
making extensive use of this function.

In discussion it was decided that if string_to_context_struct() had an
error it was its own responsibility to clean up any mess it created
along the way.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/services.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'security')

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index b52f923ce680..d11a8154500f 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -811,11 +811,12 @@ static int string_to_context_struct(struct policydb *pol,
 	/* Check the validity of the new context. */
 	if (!policydb_context_isvalid(pol, ctx)) {
 		rc = -EINVAL;
-		context_destroy(ctx);
 		goto out;
 	}
 	rc = 0;
 out:
+	if (rc)
+		context_destroy(ctx);
 	return rc;
 }
 
@@ -868,8 +869,7 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len,
 	} else if (rc)
 		goto out;
 	rc = sidtab_context_to_sid(&sidtab, &context, sid);
-	if (rc)
-		context_destroy(&context);
+	context_destroy(&context);
 out:
 	read_unlock(&policy_rwlock);
 	kfree(scontext2);
-- 
cgit v1.2.3


From f058925b201357fba48d56cc9c1719ae274b2022 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Thu, 11 Sep 2008 09:20:26 -0400
Subject: Update selinux info in MAINTAINERS and Kconfig help text

Update the SELinux entry in MAINTAINERS and drop the obsolete information
from the selinux Kconfig help text.

Signed-off-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 MAINTAINERS              | 3 ++-
 security/selinux/Kconfig | 3 ---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'security')

diff --git a/MAINTAINERS b/MAINTAINERS
index 28c69aaefcd9..5abb23e09fdc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3644,7 +3644,8 @@ P:	Eric Paris
 M:	eparis@parisplace.org
 L:	linux-kernel@vger.kernel.org (kernel issues)
 L: 	selinux@tycho.nsa.gov (subscribers-only, general discussion)
-W:	http://www.nsa.gov/selinux
+W:	http://selinuxproject.org
+T:	git kernel.org:pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git
 S:	Supported
 
 SENSABLE PHANTOM
diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
index a436d1cfa88b..26301dd651d3 100644
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -6,9 +6,6 @@ config SECURITY_SELINUX
 	help
 	  This selects NSA Security-Enhanced Linux (SELinux).
 	  You will also need a policy configuration and a labeled filesystem.
-	  You can obtain the policy compiler (checkpolicy), the utility for
-	  labeling filesystems (setfiles), and an example policy configuration
-	  from <http://www.nsa.gov/selinux/>.
 	  If you are unsure how to answer this question, answer N.
 
 config SECURITY_SELINUX_BOOTPARAM
-- 
cgit v1.2.3


From f06febc96ba8e0af80bcc3eaec0a109e88275fac Mon Sep 17 00:00:00 2001
From: Frank Mayhar <fmayhar@google.com>
Date: Fri, 12 Sep 2008 09:54:39 -0700
Subject: timers: fix itimer/many thread hang

Overview

This patch reworks the handling of POSIX CPU timers, including the
ITIMER_PROF, ITIMER_VIRT timers and rlimit handling.  It was put together
with the help of Roland McGrath, the owner and original writer of this code.

The problem we ran into, and the reason for this rework, has to do with using
a profiling timer in a process with a large number of threads.  It appears
that the performance of the old implementation of run_posix_cpu_timers() was
at least O(n*3) (where "n" is the number of threads in a process) or worse.
Everything is fine with an increasing number of threads until the time taken
for that routine to run becomes the same as or greater than the tick time, at
which point things degrade rather quickly.

This patch fixes bug 9906, "Weird hang with NPTL and SIGPROF."

Code Changes

This rework corrects the implementation of run_posix_cpu_timers() to make it
run in constant time for a particular machine.  (Performance may vary between
one machine and another depending upon whether the kernel is built as single-
or multiprocessor and, in the latter case, depending upon the number of
running processors.)  To do this, at each tick we now update fields in
signal_struct as well as task_struct.  The run_posix_cpu_timers() function
uses those fields to make its decisions.

We define a new structure, "task_cputime," to contain user, system and
scheduler times and use these in appropriate places:

struct task_cputime {
	cputime_t utime;
	cputime_t stime;
	unsigned long long sum_exec_runtime;
};

This is included in the structure "thread_group_cputime," which is a new
substructure of signal_struct and which varies for uniprocessor versus
multiprocessor kernels.  For uniprocessor kernels, it uses "task_cputime" as
a simple substructure, while for multiprocessor kernels it is a pointer:

struct thread_group_cputime {
	struct task_cputime totals;
};

struct thread_group_cputime {
	struct task_cputime *totals;
};

We also add a new task_cputime substructure directly to signal_struct, to
cache the earliest expiration of process-wide timers, and task_cputime also
replaces the it_*_expires fields of task_struct (used for earliest expiration
of thread timers).  The "thread_group_cputime" structure contains process-wide
timers that are updated via account_user_time() and friends.  In the non-SMP
case the structure is a simple aggregator; unfortunately in the SMP case that
simplicity was not achievable due to cache-line contention between CPUs (in
one measured case performance was actually _worse_ on a 16-cpu system than
the same test on a 4-cpu system, due to this contention).  For SMP, the
thread_group_cputime counters are maintained as a per-cpu structure allocated
using alloc_percpu().  The timer functions update only the timer field in
the structure corresponding to the running CPU, obtained using per_cpu_ptr().

We define a set of inline functions in sched.h that we use to maintain the
thread_group_cputime structure and hide the differences between UP and SMP
implementations from the rest of the kernel.  The thread_group_cputime_init()
function initializes the thread_group_cputime structure for the given task.
The thread_group_cputime_alloc() is a no-op for UP; for SMP it calls the
out-of-line function thread_group_cputime_alloc_smp() to allocate and fill
in the per-cpu structures and fields.  The thread_group_cputime_free()
function, also a no-op for UP, in SMP frees the per-cpu structures.  The
thread_group_cputime_clone_thread() function (also a UP no-op) for SMP calls
thread_group_cputime_alloc() if the per-cpu structures haven't yet been
allocated.  The thread_group_cputime() function fills the task_cputime
structure it is passed with the contents of the thread_group_cputime fields;
in UP it's that simple but in SMP it must also safely check that tsk->signal
is non-NULL (if it is it just uses the appropriate fields of task_struct) and,
if so, sums the per-cpu values for each online CPU.  Finally, the three
functions account_group_user_time(), account_group_system_time() and
account_group_exec_runtime() are used by timer functions to update the
respective fields of the thread_group_cputime structure.

Non-SMP operation is trivial and will not be mentioned further.

The per-cpu structure is always allocated when a task creates its first new
thread, via a call to thread_group_cputime_clone_thread() from copy_signal().
It is freed at process exit via a call to thread_group_cputime_free() from
cleanup_signal().

All functions that formerly summed utime/stime/sum_sched_runtime values from
from all threads in the thread group now use thread_group_cputime() to
snapshot the values in the thread_group_cputime structure or the values in
the task structure itself if the per-cpu structure hasn't been allocated.

Finally, the code in kernel/posix-cpu-timers.c has changed quite a bit.
The run_posix_cpu_timers() function has been split into a fast path and a
slow path; the former safely checks whether there are any expired thread
timers and, if not, just returns, while the slow path does the heavy lifting.
With the dedicated thread group fields, timers are no longer "rebalanced" and
the process_timer_rebalance() function and related code has gone away.  All
summing loops are gone and all code that used them now uses the
thread_group_cputime() inline.  When process-wide timers are set, the new
task_cputime structure in signal_struct is used to cache the earliest
expiration; this is checked in the fast path.

Performance

The fix appears not to add significant overhead to existing operations.  It
generally performs the same as the current code except in two cases, one in
which it performs slightly worse (Case 5 below) and one in which it performs
very significantly better (Case 2 below).  Overall it's a wash except in those
two cases.

I've since done somewhat more involved testing on a dual-core Opteron system.

Case 1: With no itimer running, for a test with 100,000 threads, the fixed
	kernel took 1428.5 seconds, 513 seconds more than the unfixed system,
	all of which was spent in the system.  There were twice as many
	voluntary context switches with the fix as without it.

Case 2: With an itimer running at .01 second ticks and 4000 threads (the most
	an unmodified kernel can handle), the fixed kernel ran the test in
	eight percent of the time (5.8 seconds as opposed to 70 seconds) and
	had better tick accuracy (.012 seconds per tick as opposed to .023
	seconds per tick).

Case 3: A 4000-thread test with an initial timer tick of .01 second and an
	interval of 10,000 seconds (i.e. a timer that ticks only once) had
	very nearly the same performance in both cases:  6.3 seconds elapsed
	for the fixed kernel versus 5.5 seconds for the unfixed kernel.

With fewer threads (eight in these tests), the Case 1 test ran in essentially
the same time on both the modified and unmodified kernels (5.2 seconds versus
5.8 seconds).  The Case 2 test ran in about the same time as well, 5.9 seconds
versus 5.4 seconds but again with much better tick accuracy, .013 seconds per
tick versus .025 seconds per tick for the unmodified kernel.

Since the fix affected the rlimit code, I also tested soft and hard CPU limits.

Case 4: With a hard CPU limit of 20 seconds and eight threads (and an itimer
	running), the modified kernel was very slightly favored in that while
	it killed the process in 19.997 seconds of CPU time (5.002 seconds of
	wall time), only .003 seconds of that was system time, the rest was
	user time.  The unmodified kernel killed the process in 20.001 seconds
	of CPU (5.014 seconds of wall time) of which .016 seconds was system
	time.  Really, though, the results were too close to call.  The results
	were essentially the same with no itimer running.

Case 5: With a soft limit of 20 seconds and a hard limit of 2000 seconds
	(where the hard limit would never be reached) and an itimer running,
	the modified kernel exhibited worse tick accuracy than the unmodified
	kernel: .050 seconds/tick versus .028 seconds/tick.  Otherwise,
	performance was almost indistinguishable.  With no itimer running this
	test exhibited virtually identical behavior and times in both cases.

In times past I did some limited performance testing.  those results are below.

On a four-cpu Opteron system without this fix, a sixteen-thread test executed
in 3569.991 seconds, of which user was 3568.435s and system was 1.556s.  On
the same system with the fix, user and elapsed time were about the same, but
system time dropped to 0.007 seconds.  Performance with eight, four and one
thread were comparable.  Interestingly, the timer ticks with the fix seemed
more accurate:  The sixteen-thread test with the fix received 149543 ticks
for 0.024 seconds per tick, while the same test without the fix received 58720
for 0.061 seconds per tick.  Both cases were configured for an interval of
0.01 seconds.  Again, the other tests were comparable.  Each thread in this
test computed the primes up to 25,000,000.

I also did a test with a large number of threads, 100,000 threads, which is
impossible without the fix.  In this case each thread computed the primes only
up to 10,000 (to make the runtime manageable).  System time dominated, at
1546.968 seconds out of a total 2176.906 seconds (giving a user time of
629.938s).  It received 147651 ticks for 0.015 seconds per tick, still quite
accurate.  There is obviously no comparable test without the fix.

Signed-off-by: Frank Mayhar <fmayhar@google.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/binfmt_elf.c              |  19 +-
 fs/proc/array.c              |   8 +-
 include/linux/posix-timers.h |   2 +
 include/linux/sched.h        | 257 +++++++++++++++++++++--
 include/linux/time.h         |   3 +
 kernel/compat.c              |  53 ++---
 kernel/exit.c                |  19 +-
 kernel/fork.c                |  88 ++++----
 kernel/itimer.c              |  33 +--
 kernel/posix-cpu-timers.c    | 471 +++++++++++++++++++++++--------------------
 kernel/sched.c               |  53 ++++-
 kernel/sched_fair.c          |   1 +
 kernel/sched_rt.c            |   4 +-
 kernel/signal.c              |   8 +-
 kernel/sys.c                 |  75 +++----
 security/selinux/hooks.c     |   9 +-
 16 files changed, 677 insertions(+), 426 deletions(-)

(limited to 'security')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 655ed8d30a86..a8635f637038 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1333,20 +1333,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_pgrp = task_pgrp_vnr(p);
 	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
+		struct task_cputime cputime;
+
 		/*
-		 * This is the record for the group leader.  Add in the
-		 * cumulative times of previous dead threads.  This total
-		 * won't include the time of each live thread whose state
-		 * is included in the core dump.  The final total reported
-		 * to our parent process when it calls wait4 will include
-		 * those sums as well as the little bit more time it takes
-		 * this and each other thread to finish dying after the
-		 * core dump synchronization phase.
+		 * This is the record for the group leader.  It shows the
+		 * group-wide total, not its individual thread total.
 		 */
-		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
-				   &prstatus->pr_utime);
-		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
-				   &prstatus->pr_stime);
+		thread_group_cputime(p, &cputime);
+		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
+		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
 		cputime_to_timeval(p->utime, &prstatus->pr_utime);
 		cputime_to_timeval(p->stime, &prstatus->pr_stime);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 71c9be59c9c2..933953c4e407 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -395,20 +395,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 		/* add up live thread stats at the group level */
 		if (whole) {
+			struct task_cputime cputime;
 			struct task_struct *t = task;
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				utime = cputime_add(utime, task_utime(t));
-				stime = cputime_add(stime, task_stime(t));
 				gtime = cputime_add(gtime, task_gtime(t));
 				t = next_thread(t);
 			} while (t != task);
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			utime = cputime_add(utime, sig->utime);
-			stime = cputime_add(stime, sig->stime);
+			thread_group_cputime(task, &cputime);
+			utime = cputime.utime;
+			stime = cputime.stime;
 			gtime = cputime_add(gtime, sig->gtime);
 		}
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f30ade..f9d8e9e94e9b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 long clock_nanosleep_restart(struct restart_block *restart_block);
 
+void update_rlimit_cpu(unsigned long rlim_new);
+
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d9120c5ad15..26d7a5f2d0ba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -425,6 +425,45 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime:		time spent in user mode, in &cputime_t units
+ * @stime:		time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
+ * 
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups.  Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+	cputime_t utime;
+	cputime_t stime;
+	unsigned long long sum_exec_runtime;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp	stime
+#define virt_exp	utime
+#define sched_exp	sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals:		thread group interval timers; substructure for
+ *			uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+#ifdef CONFIG_SMP
+struct thread_group_cputime {
+	struct task_cputime *totals;
+};
+#else
+struct thread_group_cputime {
+	struct task_cputime totals;
+};
+#endif
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -470,6 +509,17 @@ struct signal_struct {
 	cputime_t it_prof_expires, it_virt_expires;
 	cputime_t it_prof_incr, it_virt_incr;
 
+	/*
+	 * Thread group totals for process CPU clocks.
+	 * See thread_group_cputime(), et al, for details.
+	 */
+	struct thread_group_cputime cputime;
+
+	/* Earliest-expiration cache. */
+	struct task_cputime cputime_expires;
+
+	struct list_head cpu_timers[3];
+
 	/* job control IDs */
 
 	/*
@@ -500,7 +550,7 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
-	cputime_t utime, stime, cutime, cstime;
+	cputime_t cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -508,14 +558,6 @@ struct signal_struct {
 	unsigned long inblock, oublock, cinblock, coublock;
 	struct task_io_accounting ioac;
 
-	/*
-	 * Cumulative ns of scheduled CPU time for dead threads in the
-	 * group, not including a zombie group leader.  (This only differs
-	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
-	 * other than jiffies.)
-	 */
-	unsigned long long sum_sched_runtime;
-
 	/*
 	 * We don't bother to synchronize most readers of this at all,
 	 * because there is no reader checking a limit that actually needs
@@ -527,8 +569,6 @@ struct signal_struct {
 	 */
 	struct rlimit rlim[RLIM_NLIMITS];
 
-	struct list_head cpu_timers[3];
-
 	/* keep the process-shared keyrings here so that they do the right
 	 * thing in threads created with CLONE_THREAD */
 #ifdef CONFIG_KEYS
@@ -1134,8 +1174,7 @@ struct task_struct {
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
 
-  	cputime_t it_prof_expires, it_virt_expires;
-	unsigned long long it_sched_expires;
+	struct task_cputime cputime_expires;
 	struct list_head cpu_timers[3];
 
 /* process credentials */
@@ -1585,6 +1624,7 @@ extern unsigned long long cpu_clock(int cpu);
 
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
@@ -2081,6 +2121,197 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
+/*
+ * Thread group CPU time accounting.
+ */
+#ifdef CONFIG_SMP
+
+extern int thread_group_cputime_alloc_smp(struct task_struct *);
+extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *);
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+	sig->cputime.totals = NULL;
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
+						    struct task_struct *new)
+{
+	if (curr->signal->cputime.totals)
+		return 0;
+	return thread_group_cputime_alloc_smp(curr);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+	free_percpu(sig->cputime.totals);
+}
+
+/**
+ * thread_group_cputime - Sum the thread group time fields across all CPUs.
+ *
+ * This is a wrapper for the real routine, thread_group_cputime_smp().  See
+ * that routine for details.
+ */
+static inline void thread_group_cputime(
+	struct task_struct *tsk,
+	struct task_cputime *times)
+{
+	thread_group_cputime_smp(tsk, times);
+}
+
+/**
+ * thread_group_cputime_account_user - Maintain utime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the utime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->utime = cputime_add(times->utime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_system - Maintain stime for a thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @cputime:	Time value by which to increment the stime field of that
+ *		structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->stime = cputime_add(times->stime, cputime);
+		put_cpu_no_resched();
+	}
+}
+
+/**
+ * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a
+ *						thread group.
+ *
+ * @tgtimes:	Pointer to thread_group_cputime structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of that structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	if (tgtimes->totals) {
+		struct task_cputime *times;
+
+		times = per_cpu_ptr(tgtimes->totals, get_cpu());
+		times->sum_exec_runtime += ns;
+		put_cpu_no_resched();
+	}
+}
+
+#else /* CONFIG_SMP */
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+	sig->cputime.totals.utime = cputime_zero;
+	sig->cputime.totals.stime = cputime_zero;
+	sig->cputime.totals.sum_exec_runtime = 0;
+}
+
+static inline int thread_group_cputime_alloc(struct task_struct *tsk)
+{
+	return 0;
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr,
+						     struct task_struct *tsk)
+{
+}
+
+static inline void thread_group_cputime(struct task_struct *tsk,
+					 struct task_cputime *cputime)
+{
+	*cputime = tsk->signal->cputime.totals;
+}
+
+static inline void thread_group_cputime_account_user(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime);
+}
+
+static inline void thread_group_cputime_account_system(
+	struct thread_group_cputime *tgtimes,
+	cputime_t cputime)
+{
+	tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime);
+}
+
+static inline void thread_group_cputime_account_exec_runtime(
+	struct thread_group_cputime *tgtimes,
+	unsigned long long ns)
+{
+	tgtimes->totals->sum_exec_runtime += ns;
+}
+
+#endif /* CONFIG_SMP */
+
+static inline void account_group_user_time(struct task_struct *tsk,
+					    cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_user(&sig->cputime, cputime);
+}
+
+static inline void account_group_system_time(struct task_struct *tsk,
+					      cputime_t cputime)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_system(&sig->cputime, cputime);
+}
+
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					       unsigned long long ns)
+{
+	struct signal_struct *sig;
+
+	sig = tsk->signal;
+	if (likely(sig))
+		thread_group_cputime_account_exec_runtime(&sig->cputime, ns);
+}
+
 /*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
diff --git a/include/linux/time.h b/include/linux/time.h
index e15206a7e82e..1b70b3c293e9 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,9 @@ extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
 
+struct tms;
+extern void do_sys_times(struct tms *);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
diff --git a/kernel/compat.c b/kernel/compat.c
index 32c254a8ab9a..72650e39b3e6 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,6 +23,7 @@
 #include <linux/timex.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
+#include <linux/times.h>
 
 #include <asm/uaccess.h>
 
@@ -150,49 +151,23 @@ asmlinkage long compat_sys_setitimer(int which,
 	return 0;
 }
 
+static compat_clock_t clock_t_to_compat_clock_t(clock_t x)
+{
+	return compat_jiffies_to_clock_t(clock_t_to_jiffies(x));
+}
+
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
+		struct tms tms;
 		struct compat_tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		read_lock(&tasklist_lock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		/*
-		 * While we have tasklist_lock read-locked, no dying thread
-		 * can be updating current->signal->[us]time.  Instead,
-		 * we got their counts included in the live thread loop.
-		 * However, another thread can come in right now and
-		 * do a wait call that updates current->signal->c[us]time.
-		 * To make sure we always see that pair updated atomically,
-		 * we take the siglock around fetching them.
-		 */
-		spin_lock_irq(&tsk->sighand->siglock);
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
-
-		tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
-		tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
-		tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
-		tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
+
+		do_sys_times(&tms);
+		/* Convert our struct tms to the compat version. */
+		tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime);
+		tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime);
+		tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime);
+		tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime);
 		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
 			return -EFAULT;
 	}
diff --git a/kernel/exit.c b/kernel/exit.c
index 16395644a98f..40036ac04271 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime = cputime_add(sig->utime, task_utime(tsk));
-		sig->stime = cputime_add(sig->stime, task_stime(tsk));
 		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
@@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk)
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
 		task_io_accounting_add(&sig->ioac, &tsk->ioac);
-		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 		sig = NULL; /* Marker for below. */
 	}
 
@@ -1294,6 +1291,7 @@ static int wait_task_zombie(struct task_struct *p, int options,
 	if (likely(!traced)) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
+		struct task_cputime cputime;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1309,20 +1307,23 @@ static int wait_task_zombie(struct task_struct *p, int options,
 		 * need to protect the access to p->parent->signal fields,
 		 * as other threads in the parent group can be right
 		 * here reaping other children at the same time.
+		 *
+		 * We use thread_group_cputime() to get times for the thread
+		 * group, which consolidates times for all threads in the
+		 * group including the group leader.
 		 */
 		spin_lock_irq(&p->parent->sighand->siglock);
 		psig = p->parent->signal;
 		sig = p->signal;
+		thread_group_cputime(p, &cputime);
 		psig->cutime =
 			cputime_add(psig->cutime,
-			cputime_add(p->utime,
-			cputime_add(sig->utime,
-				    sig->cutime)));
+			cputime_add(cputime.utime,
+				    sig->cutime));
 		psig->cstime =
 			cputime_add(psig->cstime,
-			cputime_add(p->stime,
-			cputime_add(sig->stime,
-				    sig->cstime)));
+			cputime_add(cputime.stime,
+				    sig->cstime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
diff --git a/kernel/fork.c b/kernel/fork.c
index 7ce2ebe84796..a8ac2efb8e30 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -759,15 +759,44 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 		kmem_cache_free(sighand_cachep, sighand);
 }
 
+
+/*
+ * Initialize POSIX timer handling for a thread group.
+ */
+static void posix_cpu_timers_init_group(struct signal_struct *sig)
+{
+	/* Thread group counters. */
+	thread_group_cputime_init(sig);
+
+	/* Expiration times and increments. */
+	sig->it_virt_expires = cputime_zero;
+	sig->it_virt_incr = cputime_zero;
+	sig->it_prof_expires = cputime_zero;
+	sig->it_prof_incr = cputime_zero;
+
+	/* Cached expiration times. */
+	sig->cputime_expires.prof_exp = cputime_zero;
+	sig->cputime_expires.virt_exp = cputime_zero;
+	sig->cputime_expires.sched_exp = 0;
+
+	/* The timer lists. */
+	INIT_LIST_HEAD(&sig->cpu_timers[0]);
+	INIT_LIST_HEAD(&sig->cpu_timers[1]);
+	INIT_LIST_HEAD(&sig->cpu_timers[2]);
+}
+
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	int ret;
 
 	if (clone_flags & CLONE_THREAD) {
-		atomic_inc(&current->signal->count);
-		atomic_inc(&current->signal->live);
-		return 0;
+		ret = thread_group_cputime_clone_thread(current, tsk);
+		if (likely(!ret)) {
+			atomic_inc(&current->signal->count);
+			atomic_inc(&current->signal->live);
+		}
+		return ret;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
 	tsk->signal = sig;
@@ -795,15 +824,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
 
-	sig->it_virt_expires = cputime_zero;
-	sig->it_virt_incr = cputime_zero;
-	sig->it_prof_expires = cputime_zero;
-	sig->it_prof_incr = cputime_zero;
-
 	sig->leader = 0;	/* session leadership doesn't inherit */
 	sig->tty_old_pgrp = NULL;
 
-	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+	sig->cutime = sig->cstime = cputime_zero;
 	sig->gtime = cputime_zero;
 	sig->cgtime = cputime_zero;
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
@@ -820,14 +844,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 	task_unlock(current->group_leader);
 
-	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-		/*
-		 * New sole thread in the process gets an expiry time
-		 * of the whole CPU time limit.
-		 */
-		tsk->it_prof_expires =
-			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
-	}
+	posix_cpu_timers_init_group(sig);
+
 	acct_init_pacct(&sig->pacct);
 
 	tty_audit_fork(sig);
@@ -837,6 +855,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_signal(struct signal_struct *sig)
 {
+	thread_group_cputime_free(sig);
 	exit_thread_group_keys(sig);
 	kmem_cache_free(signal_cachep, sig);
 }
@@ -885,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+/*
+ * Initialize POSIX timer handling for a single task.
+ */
+static void posix_cpu_timers_init(struct task_struct *tsk)
+{
+	tsk->cputime_expires.prof_exp = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
+	tsk->cputime_expires.sched_exp = 0;
+	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
+	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
@@ -995,12 +1027,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
-	p->it_virt_expires = cputime_zero;
-	p->it_prof_expires = cputime_zero;
-	p->it_sched_expires = 0;
-	INIT_LIST_HEAD(&p->cpu_timers[0]);
-	INIT_LIST_HEAD(&p->cpu_timers[1]);
-	INIT_LIST_HEAD(&p->cpu_timers[2]);
+	posix_cpu_timers_init(p);
 
 	p->lock_depth = -1;		/* -1 = no lock */
 	do_posix_clock_monotonic_gettime(&p->start_time);
@@ -1201,21 +1228,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (clone_flags & CLONE_THREAD) {
 		p->group_leader = current->group_leader;
 		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
-
-		if (!cputime_eq(current->signal->it_virt_expires,
-				cputime_zero) ||
-		    !cputime_eq(current->signal->it_prof_expires,
-				cputime_zero) ||
-		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
-		    !list_empty(&current->signal->cpu_timers[0]) ||
-		    !list_empty(&current->signal->cpu_timers[1]) ||
-		    !list_empty(&current->signal->cpu_timers[2])) {
-			/*
-			 * Have child wake up on its first tick to check
-			 * for process CPU timers.
-			 */
-			p->it_prof_expires = jiffies_to_cputime(1);
-		}
 	}
 
 	if (likely(p->pid)) {
diff --git a/kernel/itimer.c b/kernel/itimer.c
index ab982747d9bd..db7c358b9a02 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value)
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t utime = tsk->signal->utime;
-			do {
-				utime = cputime_add(utime, t->utime);
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime cputime;
+			cputime_t utime;
+
+			thread_group_cputime(tsk, &cputime);
+			utime = cputime.utime;
 			if (cputime_le(cval, utime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
 	case ITIMER_PROF:
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
 		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_struct *t = tsk;
-			cputime_t ptime = cputime_add(tsk->signal->utime,
-						      tsk->signal->stime);
-			do {
-				ptime = cputime_add(ptime,
-						    cputime_add(t->utime,
-								t->stime));
-				t = next_thread(t);
-			} while (t != tsk);
+			struct task_cputime times;
+			cputime_t ptime;
+
+			thread_group_cputime(tsk, &times);
+			ptime = cputime_add(times.utime, times.stime);
 			if (cputime_le(cval, ptime)) { /* about to fire */
 				cval = jiffies_to_cputime(1);
 			} else {
@@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value)
 			}
 		}
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		cputime_to_timeval(cval, &value->it_value);
 		cputime_to_timeval(cinterval, &value->it_interval);
 		break;
@@ -185,7 +176,6 @@ again:
 	case ITIMER_VIRTUAL:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_virt_expires;
 		cinterval = tsk->signal->it_virt_incr;
@@ -200,7 +190,6 @@ again:
 		tsk->signal->it_virt_expires = nval;
 		tsk->signal->it_virt_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
@@ -209,7 +198,6 @@ again:
 	case ITIMER_PROF:
 		nval = timeval_to_cputime(&value->it_value);
 		ninterval = timeval_to_cputime(&value->it_interval);
-		read_lock(&tasklist_lock);
 		spin_lock_irq(&tsk->sighand->siglock);
 		cval = tsk->signal->it_prof_expires;
 		cinterval = tsk->signal->it_prof_incr;
@@ -224,7 +212,6 @@ again:
 		tsk->signal->it_prof_expires = nval;
 		tsk->signal->it_prof_incr = ninterval;
 		spin_unlock_irq(&tsk->sighand->siglock);
-		read_unlock(&tasklist_lock);
 		if (ovalue) {
 			cputime_to_timeval(cval, &ovalue->it_value);
 			cputime_to_timeval(cinterval, &ovalue->it_interval);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c42a03aef36f..dba1c334c3e8 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,99 @@
 #include <linux/math64.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_SMP
+/*
+ * Allocate the thread_group_cputime structure appropriately for SMP kernels
+ * and fill in the current values of the fields.  Called from copy_signal()
+ * via thread_group_cputime_clone_thread() when adding a second or subsequent
+ * thread to a thread group.  Assumes interrupts are enabled when called.
+ */
+int thread_group_cputime_alloc_smp(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+	struct task_cputime *cputime;
+
+	/*
+	 * If we have multiple threads and we don't already have a
+	 * per-CPU task_cputime struct, allocate one and fill it in with
+	 * the times accumulated so far.
+	 */
+	if (sig->cputime.totals)
+		return 0;
+	cputime = alloc_percpu(struct task_cputime);
+	if (cputime == NULL)
+		return -ENOMEM;
+	read_lock(&tasklist_lock);
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (sig->cputime.totals) {
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+		free_percpu(cputime);
+		return 0;
+	}
+	sig->cputime.totals = cputime;
+	cputime = per_cpu_ptr(sig->cputime.totals, get_cpu());
+	cputime->utime = tsk->utime;
+	cputime->stime = tsk->stime;
+	cputime->sum_exec_runtime = tsk->se.sum_exec_runtime;
+	put_cpu_no_resched();
+	spin_unlock_irq(&tsk->sighand->siglock);
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+/**
+ * thread_group_cputime_smp - Sum the thread group time fields across all CPUs.
+ *
+ * @tsk:	The task we use to identify the thread group.
+ * @times:	task_cputime structure in which we return the summed fields.
+ *
+ * Walk the list of CPUs to sum the per-CPU time fields in the thread group
+ * time structure.
+ */
+void thread_group_cputime_smp(
+	struct task_struct *tsk,
+	struct task_cputime *times)
+{
+	struct signal_struct *sig;
+	int i;
+	struct task_cputime *tot;
+
+	sig = tsk->signal;
+	if (unlikely(!sig) || !sig->cputime.totals) {
+		times->utime = tsk->utime;
+		times->stime = tsk->stime;
+		times->sum_exec_runtime = tsk->se.sum_exec_runtime;
+		return;
+	}
+	times->stime = times->utime = cputime_zero;
+	times->sum_exec_runtime = 0;
+	for_each_possible_cpu(i) {
+		tot = per_cpu_ptr(tsk->signal->cputime.totals, i);
+		times->utime = cputime_add(times->utime, tot->utime);
+		times->stime = cputime_add(times->stime, tot->stime);
+		times->sum_exec_runtime += tot->sum_exec_runtime;
+	}
+}
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ */
+void update_rlimit_cpu(unsigned long rlim_new)
+{
+	cputime_t cputime;
+
+	cputime = secs_to_cputime(rlim_new);
+	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
+            cputime_lt(current->signal->it_prof_expires, cputime)) {
+		spin_lock_irq(&current->sighand->siglock);
+		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
+		spin_unlock_irq(&current->sighand->siglock);
+	}
+}
+
 static int check_clock(const clockid_t which_clock)
 {
 	int error = 0;
@@ -158,10 +251,6 @@ static inline cputime_t virt_ticks(struct task_struct *p)
 {
 	return p->utime;
 }
-static inline unsigned long long sched_ns(struct task_struct *p)
-{
-	return task_sched_runtime(p);
-}
 
 int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
 {
@@ -211,7 +300,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 		cpu->cpu = virt_ticks(p);
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = sched_ns(p);
+		cpu->sched = task_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -226,31 +315,20 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx,
 					 struct task_struct *p,
 					 union cpu_time_count *cpu)
 {
-	struct task_struct *t = p;
- 	switch (clock_idx) {
+	struct task_cputime cputime;
+
+	thread_group_cputime(p, &cputime);
+	switch (clock_idx) {
 	default:
 		return -EINVAL;
 	case CPUCLOCK_PROF:
-		cpu->cpu = cputime_add(p->signal->utime, p->signal->stime);
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime_add(cputime.utime, cputime.stime);
 		break;
 	case CPUCLOCK_VIRT:
-		cpu->cpu = p->signal->utime;
-		do {
-			cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t));
-			t = next_thread(t);
-		} while (t != p);
+		cpu->cpu = cputime.utime;
 		break;
 	case CPUCLOCK_SCHED:
-		cpu->sched = p->signal->sum_sched_runtime;
-		/* Add in each other live thread.  */
-		while ((t = next_thread(t)) != p) {
-			cpu->sched += t->se.sum_exec_runtime;
-		}
-		cpu->sched += sched_ns(p);
+		cpu->sched = thread_group_sched_runtime(p);
 		break;
 	}
 	return 0;
@@ -471,80 +549,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-	cleanup_timers(tsk->signal->cpu_timers,
-		       cputime_add(tsk->utime, tsk->signal->utime),
-		       cputime_add(tsk->stime, tsk->signal->stime),
-		     tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
-}
-
-
-/*
- * Set the expiry times of all the threads in the process so one of them
- * will go off before the process cumulative expiry total is reached.
- */
-static void process_timer_rebalance(struct task_struct *p,
-				    unsigned int clock_idx,
-				    union cpu_time_count expires,
-				    union cpu_time_count val)
-{
-	cputime_t ticks, left;
-	unsigned long long ns, nsleft;
- 	struct task_struct *t = p;
-	unsigned int nthreads = atomic_read(&p->signal->live);
-
-	if (!nthreads)
-		return;
+	struct task_cputime cputime;
 
-	switch (clock_idx) {
-	default:
-		BUG();
-		break;
-	case CPUCLOCK_PROF:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(prof_ticks(t), left);
-				if (cputime_eq(t->it_prof_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_prof_expires, ticks)) {
-					t->it_prof_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_VIRT:
-		left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu),
-				       nthreads);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ticks = cputime_add(virt_ticks(t), left);
-				if (cputime_eq(t->it_virt_expires,
-					       cputime_zero) ||
-				    cputime_gt(t->it_virt_expires, ticks)) {
-					t->it_virt_expires = ticks;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	case CPUCLOCK_SCHED:
-		nsleft = expires.sched - val.sched;
-		do_div(nsleft, nthreads);
-		nsleft = max_t(unsigned long long, nsleft, 1);
-		do {
-			if (likely(!(t->flags & PF_EXITING))) {
-				ns = t->se.sum_exec_runtime + nsleft;
-				if (t->it_sched_expires == 0 ||
-				    t->it_sched_expires > ns) {
-					t->it_sched_expires = ns;
-				}
-			}
-			t = next_thread(t);
-		} while (t != p);
-		break;
-	}
+	thread_group_cputime(tsk, &cputime);
+	cleanup_timers(tsk->signal->cpu_timers,
+		       cputime.utime, cputime.stime, cputime.sum_exec_runtime);
 }
 
 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
@@ -608,29 +617,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_PROF:
-				if (cputime_eq(p->it_prof_expires,
+				if (cputime_eq(p->cputime_expires.prof_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_prof_expires,
+				    cputime_gt(p->cputime_expires.prof_exp,
 					       nt->expires.cpu))
-					p->it_prof_expires = nt->expires.cpu;
+					p->cputime_expires.prof_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_VIRT:
-				if (cputime_eq(p->it_virt_expires,
+				if (cputime_eq(p->cputime_expires.virt_exp,
 					       cputime_zero) ||
-				    cputime_gt(p->it_virt_expires,
+				    cputime_gt(p->cputime_expires.virt_exp,
 					       nt->expires.cpu))
-					p->it_virt_expires = nt->expires.cpu;
+					p->cputime_expires.virt_exp =
+						nt->expires.cpu;
 				break;
 			case CPUCLOCK_SCHED:
-				if (p->it_sched_expires == 0 ||
-				    p->it_sched_expires > nt->expires.sched)
-					p->it_sched_expires = nt->expires.sched;
+				if (p->cputime_expires.sched_exp == 0 ||
+				    p->cputime_expires.sched_exp >
+							nt->expires.sched)
+					p->cputime_expires.sched_exp =
+						nt->expires.sched;
 				break;
 			}
 		} else {
 			/*
-			 * For a process timer, we must balance
-			 * all the live threads' expirations.
+			 * For a process timer, set the cached expiration time.
 			 */
 			switch (CPUCLOCK_WHICH(timer->it_clock)) {
 			default:
@@ -641,7 +653,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				    cputime_lt(p->signal->it_virt_expires,
 					       timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.virt_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_PROF:
 				if (!cputime_eq(p->signal->it_prof_expires,
 						cputime_zero) &&
@@ -652,13 +666,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				if (i != RLIM_INFINITY &&
 				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
 					break;
-				goto rebalance;
+				p->signal->cputime_expires.prof_exp =
+					timer->it.cpu.expires.cpu;
+				break;
 			case CPUCLOCK_SCHED:
-			rebalance:
-				process_timer_rebalance(
-					timer->it.cpu.task,
-					CPUCLOCK_WHICH(timer->it_clock),
-					timer->it.cpu.expires, now);
+				p->signal->cputime_expires.sched_exp =
+					timer->it.cpu.expires.sched;
 				break;
 			}
 		}
@@ -969,13 +982,13 @@ static void check_thread_timers(struct task_struct *tsk,
 	struct signal_struct *const sig = tsk->signal;
 
 	maxfire = 20;
-	tsk->it_prof_expires = cputime_zero;
+	tsk->cputime_expires.prof_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
-			tsk->it_prof_expires = t->expires.cpu;
+			tsk->cputime_expires.prof_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -984,13 +997,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_virt_expires = cputime_zero;
+	tsk->cputime_expires.virt_exp = cputime_zero;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
-			tsk->it_virt_expires = t->expires.cpu;
+			tsk->cputime_expires.virt_exp = t->expires.cpu;
 			break;
 		}
 		t->firing = 1;
@@ -999,13 +1012,13 @@ static void check_thread_timers(struct task_struct *tsk,
 
 	++timers;
 	maxfire = 20;
-	tsk->it_sched_expires = 0;
+	tsk->cputime_expires.sched_exp = 0;
 	while (!list_empty(timers)) {
 		struct cpu_timer_list *t = list_first_entry(timers,
 						      struct cpu_timer_list,
 						      entry);
 		if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
-			tsk->it_sched_expires = t->expires.sched;
+			tsk->cputime_expires.sched_exp = t->expires.sched;
 			break;
 		}
 		t->firing = 1;
@@ -1055,10 +1068,10 @@ static void check_process_timers(struct task_struct *tsk,
 {
 	int maxfire;
 	struct signal_struct *const sig = tsk->signal;
-	cputime_t utime, stime, ptime, virt_expires, prof_expires;
+	cputime_t utime, ptime, virt_expires, prof_expires;
 	unsigned long long sum_sched_runtime, sched_expires;
-	struct task_struct *t;
 	struct list_head *timers = sig->cpu_timers;
+	struct task_cputime cputime;
 
 	/*
 	 * Don't sample the current process CPU clocks if there are no timers.
@@ -1074,18 +1087,10 @@ static void check_process_timers(struct task_struct *tsk,
 	/*
 	 * Collect the current process totals.
 	 */
-	utime = sig->utime;
-	stime = sig->stime;
-	sum_sched_runtime = sig->sum_sched_runtime;
-	t = tsk;
-	do {
-		utime = cputime_add(utime, t->utime);
-		stime = cputime_add(stime, t->stime);
-		sum_sched_runtime += t->se.sum_exec_runtime;
-		t = next_thread(t);
-	} while (t != tsk);
-	ptime = cputime_add(utime, stime);
-
+	thread_group_cputime(tsk, &cputime);
+	utime = cputime.utime;
+	ptime = cputime_add(utime, cputime.stime);
+	sum_sched_runtime = cputime.sum_exec_runtime;
 	maxfire = 20;
 	prof_expires = cputime_zero;
 	while (!list_empty(timers)) {
@@ -1193,60 +1198,18 @@ static void check_process_timers(struct task_struct *tsk,
 		}
 	}
 
-	if (!cputime_eq(prof_expires, cputime_zero) ||
-	    !cputime_eq(virt_expires, cputime_zero) ||
-	    sched_expires != 0) {
-		/*
-		 * Rebalance the threads' expiry times for the remaining
-		 * process CPU timers.
-		 */
-
-		cputime_t prof_left, virt_left, ticks;
-		unsigned long long sched_left, sched;
-		const unsigned int nthreads = atomic_read(&sig->live);
-
-		if (!nthreads)
-			return;
-
-		prof_left = cputime_sub(prof_expires, utime);
-		prof_left = cputime_sub(prof_left, stime);
-		prof_left = cputime_div_non_zero(prof_left, nthreads);
-		virt_left = cputime_sub(virt_expires, utime);
-		virt_left = cputime_div_non_zero(virt_left, nthreads);
-		if (sched_expires) {
-			sched_left = sched_expires - sum_sched_runtime;
-			do_div(sched_left, nthreads);
-			sched_left = max_t(unsigned long long, sched_left, 1);
-		} else {
-			sched_left = 0;
-		}
-		t = tsk;
-		do {
-			if (unlikely(t->flags & PF_EXITING))
-				continue;
-
-			ticks = cputime_add(cputime_add(t->utime, t->stime),
-					    prof_left);
-			if (!cputime_eq(prof_expires, cputime_zero) &&
-			    (cputime_eq(t->it_prof_expires, cputime_zero) ||
-			     cputime_gt(t->it_prof_expires, ticks))) {
-				t->it_prof_expires = ticks;
-			}
-
-			ticks = cputime_add(t->utime, virt_left);
-			if (!cputime_eq(virt_expires, cputime_zero) &&
-			    (cputime_eq(t->it_virt_expires, cputime_zero) ||
-			     cputime_gt(t->it_virt_expires, ticks))) {
-				t->it_virt_expires = ticks;
-			}
-
-			sched = t->se.sum_exec_runtime + sched_left;
-			if (sched_expires && (t->it_sched_expires == 0 ||
-					      t->it_sched_expires > sched)) {
-				t->it_sched_expires = sched;
-			}
-		} while ((t = next_thread(t)) != tsk);
-	}
+	if (!cputime_eq(prof_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
+		sig->cputime_expires.prof_exp = prof_expires;
+	if (!cputime_eq(virt_expires, cputime_zero) &&
+	    (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
+	     cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
+		sig->cputime_expires.virt_exp = virt_expires;
+	if (sched_expires != 0 &&
+	    (sig->cputime_expires.sched_exp == 0 ||
+	     sig->cputime_expires.sched_exp > sched_expires))
+		sig->cputime_expires.sched_exp = sched_expires;
 }
 
 /*
@@ -1314,6 +1277,78 @@ out:
 	++timer->it_requeue_pending;
 }
 
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:	The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+	if (cputime_eq(cputime->utime, cputime_zero) &&
+	    cputime_eq(cputime->stime, cputime_zero) &&
+	    cputime->sum_exec_runtime == 0)
+		return 1;
+	return 0;
+}
+
+/**
+ * task_cputime_expired - Compare two task_cputime entities.
+ *
+ * @sample:	The task_cputime structure to be checked for expiration.
+ * @expires:	Expiration times, against which @sample will be checked.
+ *
+ * Checks @sample against @expires to see if any field of @sample has expired.
+ * Returns true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns false.
+ */
+static inline int task_cputime_expired(const struct task_cputime *sample,
+					const struct task_cputime *expires)
+{
+	if (!cputime_eq(expires->utime, cputime_zero) &&
+	    cputime_ge(sample->utime, expires->utime))
+		return 1;
+	if (!cputime_eq(expires->stime, cputime_zero) &&
+	    cputime_ge(cputime_add(sample->utime, sample->stime),
+		       expires->stime))
+		return 1;
+	if (expires->sum_exec_runtime != 0 &&
+	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
+		return 1;
+	return 0;
+}
+
+/**
+ * fastpath_timer_check - POSIX CPU timers fast path.
+ *
+ * @tsk:	The task (thread) being checked.
+ * @sig:	The signal pointer for that task.
+ *
+ * If there are no timers set return false.  Otherwise snapshot the task and
+ * thread group timers, then compare them with the corresponding expiration
+ # times.  Returns true if a timer has expired, else returns false.
+ */
+static inline int fastpath_timer_check(struct task_struct *tsk,
+					struct signal_struct *sig)
+{
+	struct task_cputime task_sample = {
+		.utime = tsk->utime,
+		.stime = tsk->stime,
+		.sum_exec_runtime = tsk->se.sum_exec_runtime
+	};
+	struct task_cputime group_sample;
+
+	if (task_cputime_zero(&tsk->cputime_expires) &&
+	    task_cputime_zero(&sig->cputime_expires))
+		return 0;
+	if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+		return 1;
+	thread_group_cputime(tsk, &group_sample);
+	return task_cputime_expired(&group_sample, &sig->cputime_expires);
+}
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.
@@ -1323,30 +1358,29 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 {
 	LIST_HEAD(firing);
 	struct k_itimer *timer, *next;
+	struct signal_struct *sig;
+	struct sighand_struct *sighand;
+	unsigned long flags;
 
 	BUG_ON(!irqs_disabled());
 
-#define UNEXPIRED(clock) \
-		(cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \
-		 cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires))
-
-	if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
-	    (tsk->it_sched_expires == 0 ||
-	     tsk->se.sum_exec_runtime < tsk->it_sched_expires))
-		return;
-
-#undef	UNEXPIRED
-
+	/* Pick up tsk->signal and make sure it's valid. */
+	sig = tsk->signal;
 	/*
-	 * Double-check with locks held.
+	 * The fast path checks that there are no expired thread or thread
+	 * group timers.  If that's so, just return.  Also check that
+	 * tsk->signal is non-NULL; this probably can't happen but cover the
+	 * possibility anyway.
 	 */
-	read_lock(&tasklist_lock);
-	if (likely(tsk->signal != NULL)) {
-		spin_lock(&tsk->sighand->siglock);
-
+	if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) {
+		return;
+	}
+	sighand = lock_task_sighand(tsk, &flags);
+	if (likely(sighand)) {
 		/*
-		 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
-		 * all the timers that are firing, and put them on the firing list.
+		 * Here we take off tsk->signal->cpu_timers[N] and
+		 * tsk->cpu_timers[N] all the timers that are firing, and
+		 * put them on the firing list.
 		 */
 		check_thread_timers(tsk, &firing);
 		check_process_timers(tsk, &firing);
@@ -1359,9 +1393,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 		 * that gets the timer lock before we do will give it up and
 		 * spin until we've taken care of that timer below.
 		 */
-		spin_unlock(&tsk->sighand->siglock);
 	}
-	read_unlock(&tasklist_lock);
+	unlock_task_sighand(tsk, &flags);
 
 	/*
 	 * Now that all the timers on our list have the firing flag,
@@ -1389,10 +1422,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 
 /*
  * Set one of the process-wide special case CPU timers.
- * The tasklist_lock and tsk->sighand->siglock must be held by the caller.
- * The oldval argument is null for the RLIMIT_CPU timer, where *newval is
- * absolute; non-null for ITIMER_*, where *newval is relative and we update
- * it to be absolute, *oldval is absolute and we update it to be relative.
+ * The tsk->sighand->siglock must be held by the caller.
+ * The *newval argument is relative and we update it to be absolute, *oldval
+ * is absolute and we update it to be relative.
  */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval)
@@ -1435,13 +1467,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	    cputime_ge(list_first_entry(head,
 				  struct cpu_timer_list, entry)->expires.cpu,
 		       *newval)) {
-		/*
-		 * Rejigger each thread's expiry time so that one will
-		 * notice before we hit the process-cumulative expiry time.
-		 */
-		union cpu_time_count expires = { .sched = 0 };
-		expires.cpu = *newval;
-		process_timer_rebalance(tsk, clock_idx, expires, now);
+		switch (clock_idx) {
+		case CPUCLOCK_PROF:
+			tsk->signal->cputime_expires.prof_exp = *newval;
+			break;
+		case CPUCLOCK_VIRT:
+			tsk->signal->cputime_expires.virt_exp = *newval;
+			break;
+		}
 	}
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index cc1f81b50b82..c51b5d276665 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4036,6 +4036,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
 
 EXPORT_PER_CPU_SYMBOL(kstat);
 
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ *
+ * Called with task_rq_lock() held on @rq.
+ */
+static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq)
+{
+	if (task_current(rq, p)) {
+		u64 delta_exec;
+
+		update_rq_clock(rq);
+		delta_exec = rq->clock - p->se.exec_start;
+		if ((s64)delta_exec > 0)
+			return delta_exec;
+	}
+	return 0;
+}
+
 /*
  * Return p->sum_exec_runtime plus any more ns on the sched_clock
  * that have not yet been banked in case the task is currently running.
@@ -4043,17 +4062,31 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 unsigned long long task_sched_runtime(struct task_struct *p)
 {
 	unsigned long flags;
-	u64 ns, delta_exec;
+	u64 ns;
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	ns = p->se.sum_exec_runtime;
-	if (task_current(rq, p)) {
-		update_rq_clock(rq);
-		delta_exec = rq->clock - p->se.exec_start;
-		if ((s64)delta_exec > 0)
-			ns += delta_exec;
-	}
+	ns = p->se.sum_exec_runtime + task_delta_exec(p, rq);
+	task_rq_unlock(rq, &flags);
+
+	return ns;
+}
+
+/*
+ * Return sum_exec_runtime for the thread group plus any more ns on the
+ * sched_clock that have not yet been banked in case the task is currently
+ * running.
+ */
+unsigned long long thread_group_sched_runtime(struct task_struct *p)
+{
+	unsigned long flags;
+	u64 ns;
+	struct rq *rq;
+	struct task_cputime totals;
+
+	rq = task_rq_lock(p, &flags);
+	thread_group_cputime(p, &totals);
+	ns = totals.sum_exec_runtime + task_delta_exec(p, rq);
 	task_rq_unlock(rq, &flags);
 
 	return ns;
@@ -4070,6 +4103,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 	cputime64_t tmp;
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4094,6 +4128,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
 	tmp = cputime_to_cputime64(cputime);
 
 	p->utime = cputime_add(p->utime, cputime);
+	account_group_user_time(p, cputime);
 	p->gtime = cputime_add(p->gtime, cputime);
 
 	cpustat->user = cputime64_add(cpustat->user, tmp);
@@ -4129,6 +4164,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	}
 
 	p->stime = cputime_add(p->stime, cputime);
+	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
@@ -4170,6 +4206,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 
 	if (p == rq->idle) {
 		p->stime = cputime_add(p->stime, steal);
+		account_group_system_time(p, steal);
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 		else
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fb8994c6d4bb..99aa31acc544 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -507,6 +507,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 		struct task_struct *curtask = task_of(curr);
 
 		cpuacct_charge(curtask, delta_exec);
+		account_group_exec_runtime(curtask, delta_exec);
 	}
 }
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 552310798dad..8375e69af36a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -483,6 +483,8 @@ static void update_curr_rt(struct rq *rq)
 	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
+	account_group_exec_runtime(curr, delta_exec);
+
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
@@ -1412,7 +1414,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 		p->rt.timeout++;
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
-			p->it_sched_expires = p->se.sum_exec_runtime;
+			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
 	}
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..6eea5826d618 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 	struct siginfo info;
 	unsigned long flags;
 	struct sighand_struct *psig;
+	struct task_cputime cputime;
 	int ret = sig;
 
 	BUG_ON(sig == -1);
@@ -1368,10 +1369,9 @@ int do_notify_parent(struct task_struct *tsk, int sig)
 
 	info.si_uid = tsk->uid;
 
-	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
-						       tsk->signal->utime));
-	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
-						       tsk->signal->stime));
+	thread_group_cputime(tsk, &cputime);
+	info.si_utime = cputime_to_jiffies(cputime.utime);
+	info.si_stime = cputime_to_jiffies(cputime.stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index 038a7bc0901d..d046a7a055c2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid)
 	return old_fsgid;
 }
 
+void do_sys_times(struct tms *tms)
+{
+	struct task_cputime cputime;
+	cputime_t cutime, cstime;
+
+	spin_lock_irq(&current->sighand->siglock);
+	thread_group_cputime(current, &cputime);
+	cutime = current->signal->cutime;
+	cstime = current->signal->cstime;
+	spin_unlock_irq(&current->sighand->siglock);
+	tms->tms_utime = cputime_to_clock_t(cputime.utime);
+	tms->tms_stime = cputime_to_clock_t(cputime.stime);
+	tms->tms_cutime = cputime_to_clock_t(cutime);
+	tms->tms_cstime = cputime_to_clock_t(cstime);
+}
+
 asmlinkage long sys_times(struct tms __user * tbuf)
 {
-	/*
-	 *	In the SMP world we might just be unlucky and have one of
-	 *	the times increment as we use it. Since the value is an
-	 *	atomically safe type this is just fine. Conceptually its
-	 *	as if the syscall took an instant longer to occur.
-	 */
 	if (tbuf) {
 		struct tms tmp;
-		struct task_struct *tsk = current;
-		struct task_struct *t;
-		cputime_t utime, stime, cutime, cstime;
-
-		spin_lock_irq(&tsk->sighand->siglock);
-		utime = tsk->signal->utime;
-		stime = tsk->signal->stime;
-		t = tsk;
-		do {
-			utime = cputime_add(utime, t->utime);
-			stime = cputime_add(stime, t->stime);
-			t = next_thread(t);
-		} while (t != tsk);
-
-		cutime = tsk->signal->cutime;
-		cstime = tsk->signal->cstime;
-		spin_unlock_irq(&tsk->sighand->siglock);
-
-		tmp.tms_utime = cputime_to_clock_t(utime);
-		tmp.tms_stime = cputime_to_clock_t(stime);
-		tmp.tms_cutime = cputime_to_clock_t(cutime);
-		tmp.tms_cstime = cputime_to_clock_t(cstime);
+
+		do_sys_times(&tmp);
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
@@ -1445,7 +1435,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 {
 	struct rlimit new_rlim, *old_rlim;
-	unsigned long it_prof_secs;
 	int retval;
 
 	if (resource >= RLIM_NLIMITS)
@@ -1491,18 +1480,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
 	if (new_rlim.rlim_cur == RLIM_INFINITY)
 		goto out;
 
-	it_prof_secs = cputime_to_secs(current->signal->it_prof_expires);
-	if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) {
-		unsigned long rlim_cur = new_rlim.rlim_cur;
-		cputime_t cputime;
-
-		cputime = secs_to_cputime(rlim_cur);
-		read_lock(&tasklist_lock);
-		spin_lock_irq(&current->sighand->siglock);
-		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-		spin_unlock_irq(&current->sighand->siglock);
-		read_unlock(&tasklist_lock);
-	}
+	update_rlimit_cpu(new_rlim.rlim_cur);
 out:
 	return 0;
 }
@@ -1540,11 +1518,8 @@ out:
  *
  */
 
-static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r,
-				     cputime_t *utimep, cputime_t *stimep)
+static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
 {
-	*utimep = cputime_add(*utimep, t->utime);
-	*stimep = cputime_add(*stimep, t->stime);
 	r->ru_nvcsw += t->nvcsw;
 	r->ru_nivcsw += t->nivcsw;
 	r->ru_minflt += t->min_flt;
@@ -1558,12 +1533,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	struct task_struct *t;
 	unsigned long flags;
 	cputime_t utime, stime;
+	struct task_cputime cputime;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
 
 	if (who == RUSAGE_THREAD) {
-		accumulate_thread_rusage(p, r, &utime, &stime);
+		accumulate_thread_rusage(p, r);
 		goto out;
 	}
 
@@ -1586,8 +1562,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 				break;
 
 		case RUSAGE_SELF:
-			utime = cputime_add(utime, p->signal->utime);
-			stime = cputime_add(stime, p->signal->stime);
+			thread_group_cputime(p, &cputime);
+			utime = cputime_add(utime, cputime.utime);
+			stime = cputime_add(stime, cputime.stime);
 			r->ru_nvcsw += p->signal->nvcsw;
 			r->ru_nivcsw += p->signal->nivcsw;
 			r->ru_minflt += p->signal->min_flt;
@@ -1596,7 +1573,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_oublock += p->signal->oublock;
 			t = p;
 			do {
-				accumulate_thread_rusage(t, r, &utime, &stime);
+				accumulate_thread_rusage(t, r);
 				t = next_thread(t);
 			} while (t != p);
 			break;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 03fc6a81ae32..69649783c266 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -75,6 +75,7 @@
 #include <linux/string.h>
 #include <linux/selinux.h>
 #include <linux/mutex.h>
+#include <linux/posix-timers.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -2321,13 +2322,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm)
 			initrlim = init_task.signal->rlim+i;
 			rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur);
 		}
-		if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
-			/*
-			 * This will cause RLIMIT_CPU calculations
-			 * to be refigured.
-			 */
-			current->it_prof_expires = jiffies_to_cputime(1);
-		}
+		update_rlimit_cpu(rlim->rlim_cur);
 	}
 
 	/* Wake up the parent if it is waiting so that it can
-- 
cgit v1.2.3


From de45e806a84909648623119dfe6fc1d31e71ceba Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Fri, 26 Sep 2008 22:27:47 -0400
Subject: file capabilities: uninline cap_safe_nice

This reduces the kernel size by 289 bytes.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Acked-by: Andrew G. Morgan <morgan@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/commoncap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/commoncap.c b/security/commoncap.c
index e4c4b3fc0c04..399bfdb9e2da 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -541,7 +541,7 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid,
  * yet with increased caps.
  * So we check for increased caps on the target process.
  */
-static inline int cap_safe_nice(struct task_struct *p)
+static int cap_safe_nice(struct task_struct *p)
 {
 	if (!cap_issubset(p->cap_permitted, current->cap_permitted) &&
 	    !capable(CAP_SYS_NICE))
-- 
cgit v1.2.3


From ea6b184f7d521a503ecab71feca6e4057562252b Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Mon, 22 Sep 2008 15:41:19 -0400
Subject: selinux: use default proc sid on symlinks

As we are not concerned with fine-grained control over reading of
symlinks in proc, always use the default proc SID for all proc symlinks.
This should help avoid permission issues upon changes to the proc tree
as in the /proc/net -> /proc/self/net example.
This does not alter labeling of symlinks within /proc/pid directories.
ls -Zd /proc/net output before and after the patch should show the difference.

Signed-off-by:  Stephen D. Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/hooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 89f446d86054..4a7374c12d9c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1291,7 +1291,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
 		/* Default to the fs superblock SID. */
 		isec->sid = sbsec->sid;
 
-		if (sbsec->proc) {
+		if (sbsec->proc && !S_ISLNK(inode->i_mode)) {
 			struct proc_inode *proci = PROC_I(inode);
 			if (proci->pde) {
 				isec->sclass = inode_mode_to_security_class(inode->i_mode);
-- 
cgit v1.2.3


From 81990fbdd18b9cfdc93dc221ff3250f81468aed8 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 3 Oct 2008 10:51:15 -0400
Subject: selinux: Fix an uninitialized variable BUG/panic in
 selinux_secattr_to_sid()

At some point during the 2.6.27 development cycle two new fields were added
to the SELinux context structure, a string pointer and a length field.  The
code in selinux_secattr_to_sid() was not modified and as a result these two
fields were left uninitialized which could result in erratic behavior,
including kernel panics, when NetLabel is used.  This patch fixes the
problem by fully initializing the context in selinux_secattr_to_sid() before
use and reducing the level of direct context manipulation done to help
prevent future problems.

Please apply this to the 2.6.27-rcX release stream.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/services.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'security')

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 876b815c1ba4..ab0cc0c7b944 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2907,6 +2907,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr,
 		if (ctx == NULL)
 			goto netlbl_secattr_to_sid_return;
 
+		context_init(&ctx_new);
 		ctx_new.user = ctx->user;
 		ctx_new.role = ctx->role;
 		ctx_new.type = ctx->type;
@@ -2915,13 +2916,9 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr,
 			if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat,
 						  secattr->attr.mls.cat) != 0)
 				goto netlbl_secattr_to_sid_return;
-			ctx_new.range.level[1].cat.highbit =
-				ctx_new.range.level[0].cat.highbit;
-			ctx_new.range.level[1].cat.node =
-				ctx_new.range.level[0].cat.node;
-		} else {
-			ebitmap_init(&ctx_new.range.level[0].cat);
-			ebitmap_init(&ctx_new.range.level[1].cat);
+			memcpy(&ctx_new.range.level[1].cat,
+			       &ctx_new.range.level[0].cat,
+			       sizeof(ctx_new.range.level[0].cat));
 		}
 		if (mls_context_isvalid(&policydb, &ctx_new) != 1)
 			goto netlbl_secattr_to_sid_return_cleanup;
-- 
cgit v1.2.3


From 3040a6d5a2655c7967bd42b5fb4903d48daa747f Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 3 Oct 2008 10:51:15 -0400
Subject: selinux: Fix an uninitialized variable BUG/panic in
 selinux_secattr_to_sid()

At some point during the 2.6.27 development cycle two new fields were added
to the SELinux context structure, a string pointer and a length field.  The
code in selinux_secattr_to_sid() was not modified and as a result these two
fields were left uninitialized which could result in erratic behavior,
including kernel panics, when NetLabel is used.  This patch fixes the
problem by fully initializing the context in selinux_secattr_to_sid() before
use and reducing the level of direct context manipulation done to help
prevent future problems.

Please apply this to the 2.6.27-rcX release stream.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/services.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'security')

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index d11a8154500f..8551952ef329 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2737,6 +2737,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr,
 		if (ctx == NULL)
 			goto netlbl_secattr_to_sid_return;
 
+		context_init(&ctx_new);
 		ctx_new.user = ctx->user;
 		ctx_new.role = ctx->role;
 		ctx_new.type = ctx->type;
@@ -2745,13 +2746,9 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr,
 			if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat,
 						  secattr->attr.mls.cat) != 0)
 				goto netlbl_secattr_to_sid_return;
-			ctx_new.range.level[1].cat.highbit =
-				ctx_new.range.level[0].cat.highbit;
-			ctx_new.range.level[1].cat.node =
-				ctx_new.range.level[0].cat.node;
-		} else {
-			ebitmap_init(&ctx_new.range.level[0].cat);
-			ebitmap_init(&ctx_new.range.level[1].cat);
+			memcpy(&ctx_new.range.level[1].cat,
+			       &ctx_new.range.level[0].cat,
+			       sizeof(ctx_new.range.level[0].cat));
 		}
 		if (mls_context_isvalid(&policydb, &ctx_new) != 1)
 			goto netlbl_secattr_to_sid_return_cleanup;
-- 
cgit v1.2.3


From accc609322ef5ed44cba6d2d70c741afc76385fb Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:29 -0400
Subject: selinux: Cleanup the NetLabel glue code

We were doing a lot of extra work in selinux_netlbl_sock_graft() what wasn't
necessary so this patch removes that code.  It also removes the redundant
second argument to selinux_netlbl_sock_setsid() which allows us to simplify a
few other functions.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 security/selinux/netlabel.c | 38 ++++++++++----------------------------
 1 file changed, 10 insertions(+), 28 deletions(-)

(limited to 'security')

diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 89b418392f11..b9ce5fcf3432 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -66,22 +66,24 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb,
 /**
  * selinux_netlbl_sock_setsid - Label a socket using the NetLabel mechanism
  * @sk: the socket to label
- * @sid: the SID to use
  *
  * Description:
- * Attempt to label a socket using the NetLabel mechanism using the given
- * SID.  Returns zero values on success, negative values on failure.
+ * Attempt to label a socket using the NetLabel mechanism.  Returns zero values
+ * on success, negative values on failure.
  *
  */
-static int selinux_netlbl_sock_setsid(struct sock *sk, u32 sid)
+static int selinux_netlbl_sock_setsid(struct sock *sk)
 {
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
 	struct netlbl_lsm_secattr secattr;
 
+	if (sksec->nlbl_state != NLBL_REQUIRE)
+		return 0;
+
 	netlbl_secattr_init(&secattr);
 
-	rc = security_netlbl_sid_to_secattr(sid, &secattr);
+	rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr);
 	if (rc != 0)
 		goto sock_setsid_return;
 	rc = netlbl_sock_setattr(sk, &secattr);
@@ -174,24 +176,10 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
  */
 void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
 {
-	struct sk_security_struct *sksec = sk->sk_security;
-	struct netlbl_lsm_secattr secattr;
-	u32 nlbl_peer_sid;
-
-	if (sksec->nlbl_state != NLBL_REQUIRE)
-		return;
-
-	netlbl_secattr_init(&secattr);
-	if (netlbl_sock_getattr(sk, &secattr) == 0 &&
-	    secattr.flags != NETLBL_SECATTR_NONE &&
-	    security_netlbl_secattr_to_sid(&secattr, &nlbl_peer_sid) == 0)
-		sksec->peer_sid = nlbl_peer_sid;
-	netlbl_secattr_destroy(&secattr);
-
 	/* Try to set the NetLabel on the socket to save time later, if we fail
 	 * here we will pick up the pieces in later calls to
 	 * selinux_netlbl_inode_permission(). */
-	selinux_netlbl_sock_setsid(sk, sksec->sid);
+	selinux_netlbl_sock_setsid(sk);
 }
 
 /**
@@ -205,13 +193,7 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
  */
 int selinux_netlbl_socket_post_create(struct socket *sock)
 {
-	struct sock *sk = sock->sk;
-	struct sk_security_struct *sksec = sk->sk_security;
-
-	if (sksec->nlbl_state != NLBL_REQUIRE)
-		return 0;
-
-	return selinux_netlbl_sock_setsid(sk, sksec->sid);
+	return selinux_netlbl_sock_setsid(sock->sk);
 }
 
 /**
@@ -246,7 +228,7 @@ int selinux_netlbl_inode_permission(struct inode *inode, int mask)
 	local_bh_disable();
 	bh_lock_sock_nested(sk);
 	if (likely(sksec->nlbl_state == NLBL_REQUIRE))
-		rc = selinux_netlbl_sock_setsid(sk, sksec->sid);
+		rc = selinux_netlbl_sock_setsid(sk);
 	else
 		rc = 0;
 	bh_unlock_sock(sk);
-- 
cgit v1.2.3


From aa86290089a1e57b4bdbbb4720072233f66bd5b2 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:29 -0400
Subject: selinux: Correctly handle IPv4 packets on IPv6 sockets in all cases

We did the right thing in a few cases but there were several areas where we
determined a packet's address family based on the socket's address family which
is not the right thing to do since we can get IPv4 packets on IPv6 sockets.
This patch fixes these problems by either taking the address family directly
from the packet.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 security/selinux/hooks.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 03fc6a81ae32..223f474bee86 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4207,10 +4207,12 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *
 	u32 peer_secid = SECSID_NULL;
 	u16 family;
 
-	if (sock)
+	if (skb && skb->protocol == htons(ETH_P_IP))
+		family = PF_INET;
+	else if (skb && skb->protocol == htons(ETH_P_IPV6))
+		family = PF_INET6;
+	else if (sock)
 		family = sock->sk->sk_family;
-	else if (skb && skb->sk)
-		family = skb->sk->sk_family;
 	else
 		goto out;
 
@@ -4277,10 +4279,15 @@ static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 {
 	struct sk_security_struct *sksec = sk->sk_security;
 	int err;
+	u16 family = sk->sk_family;
 	u32 newsid;
 	u32 peersid;
 
-	err = selinux_skb_peerlbl_sid(skb, sk->sk_family, &peersid);
+	/* handle mapped IPv4 packets arriving via IPv6 sockets */
+	if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
+		family = PF_INET;
+
+	err = selinux_skb_peerlbl_sid(skb, family, &peersid);
 	if (err)
 		return err;
 	if (peersid == SECSID_NULL) {
@@ -4318,9 +4325,14 @@ static void selinux_inet_csk_clone(struct sock *newsk,
 static void selinux_inet_conn_established(struct sock *sk,
 				struct sk_buff *skb)
 {
+	u16 family = sk->sk_family;
 	struct sk_security_struct *sksec = sk->sk_security;
 
-	selinux_skb_peerlbl_sid(skb, sk->sk_family, &sksec->peer_sid);
+	/* handle mapped IPv4 packets arriving via IPv6 sockets */
+	if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
+		family = PF_INET;
+
+	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
 }
 
 static void selinux_req_classify_flow(const struct request_sock *req,
-- 
cgit v1.2.3


From d8395c876bb8a560c8a032887e191b95499a25d6 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:30 -0400
Subject: selinux: Better local/forward check in selinux_ip_postroute()

It turns out that checking to see if skb->sk is NULL is not a very good
indicator of a forwarded packet as some locally generated packets also have
skb->sk set to NULL.  Fix this by not only checking the skb->sk field but also
the IP[6]CB(skb)->flags field for the IP[6]SKB_FORWARDED flag.  While we are
at it, we are calling selinux_parse_skb() much earlier than we really should
resulting in potentially wasted cycles parsing packets for information we
might no use; so shuffle the code around a bit to fix this.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 security/selinux/hooks.c | 126 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 81 insertions(+), 45 deletions(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 223f474bee86..b520667a24be 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4070,20 +4070,28 @@ static int selinux_sock_rcv_skb_iptables_compat(struct sock *sk,
 }
 
 static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
-				       struct avc_audit_data *ad,
-				       u16 family, char *addrp)
+				       u16 family)
 {
 	int err;
 	struct sk_security_struct *sksec = sk->sk_security;
 	u32 peer_sid;
 	u32 sk_sid = sksec->sid;
+	struct avc_audit_data ad;
+	char *addrp;
+
+	AVC_AUDIT_DATA_INIT(&ad, NET);
+	ad.u.net.netif = skb->iif;
+	ad.u.net.family = family;
+	err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
+	if (err)
+		return err;
 
 	if (selinux_compat_net)
-		err = selinux_sock_rcv_skb_iptables_compat(sk, skb, ad,
+		err = selinux_sock_rcv_skb_iptables_compat(sk, skb, &ad,
 							   family, addrp);
 	else
 		err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET,
-				   PACKET__RECV, ad);
+				   PACKET__RECV, &ad);
 	if (err)
 		return err;
 
@@ -4092,12 +4100,12 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
 		if (err)
 			return err;
 		err = avc_has_perm(sk_sid, peer_sid,
-				   SECCLASS_PEER, PEER__RECV, ad);
+				   SECCLASS_PEER, PEER__RECV, &ad);
 	} else {
-		err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, ad);
+		err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
 		if (err)
 			return err;
-		err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, ad);
+		err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
 	}
 
 	return err;
@@ -4111,6 +4119,8 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	u32 sk_sid = sksec->sid;
 	struct avc_audit_data ad;
 	char *addrp;
+	u8 secmark_active;
+	u8 peerlbl_active;
 
 	if (family != PF_INET && family != PF_INET6)
 		return 0;
@@ -4119,6 +4129,18 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP))
 		family = PF_INET;
 
+	/* If any sort of compatibility mode is enabled then handoff processing
+	 * to the selinux_sock_rcv_skb_compat() function to deal with the
+	 * special handling.  We do this in an attempt to keep this function
+	 * as fast and as clean as possible. */
+	if (selinux_compat_net || !selinux_policycap_netpeer)
+		return selinux_sock_rcv_skb_compat(sk, skb, family);
+
+	secmark_active = selinux_secmark_enabled();
+	peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+	if (!secmark_active && !peerlbl_active)
+		return 0;
+
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->iif;
 	ad.u.net.family = family;
@@ -4126,15 +4148,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (err)
 		return err;
 
-	/* If any sort of compatibility mode is enabled then handoff processing
-	 * to the selinux_sock_rcv_skb_compat() function to deal with the
-	 * special handling.  We do this in an attempt to keep this function
-	 * as fast and as clean as possible. */
-	if (selinux_compat_net || !selinux_policycap_netpeer)
-		return selinux_sock_rcv_skb_compat(sk, skb, &ad,
-						   family, addrp);
-
-	if (netlbl_enabled() || selinux_xfrm_enabled()) {
+	if (peerlbl_active) {
 		u32 peer_sid;
 
 		err = selinux_skb_peerlbl_sid(skb, family, &peer_sid);
@@ -4148,7 +4162,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 				   PEER__RECV, &ad);
 	}
 
-	if (selinux_secmark_enabled()) {
+	if (secmark_active) {
 		err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET,
 				   PACKET__RECV, &ad);
 		if (err)
@@ -4396,15 +4410,15 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	if (!secmark_active && !peerlbl_active)
 		return NF_ACCEPT;
 
+	if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
+		return NF_DROP;
+
 	AVC_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = ifindex;
 	ad.u.net.family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
 		return NF_DROP;
 
-	if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
-		return NF_DROP;
-
 	if (peerlbl_active)
 		if (selinux_inet_sys_rcv_skb(ifindex, addrp, family,
 					     peer_sid, &ad) != 0)
@@ -4505,30 +4519,36 @@ static int selinux_ip_postroute_iptables_compat(struct sock *sk,
 
 static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
 						int ifindex,
-						struct avc_audit_data *ad,
-						u16 family,
-						char *addrp,
-						u8 proto)
+						u16 family)
 {
 	struct sock *sk = skb->sk;
 	struct sk_security_struct *sksec;
+	struct avc_audit_data ad;
+	char *addrp;
+	u8 proto;
 
 	if (sk == NULL)
 		return NF_ACCEPT;
 	sksec = sk->sk_security;
 
+	AVC_AUDIT_DATA_INIT(&ad, NET);
+	ad.u.net.netif = ifindex;
+	ad.u.net.family = family;
+	if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
+		return NF_DROP;
+
 	if (selinux_compat_net) {
 		if (selinux_ip_postroute_iptables_compat(skb->sk, ifindex,
-							 ad, family, addrp))
+							 &ad, family, addrp))
 			return NF_DROP;
 	} else {
 		if (avc_has_perm(sksec->sid, skb->secmark,
-				 SECCLASS_PACKET, PACKET__SEND, ad))
+				 SECCLASS_PACKET, PACKET__SEND, &ad))
 			return NF_DROP;
 	}
 
 	if (selinux_policycap_netpeer)
-		if (selinux_xfrm_postroute_last(sksec->sid, skb, ad, proto))
+		if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto))
 			return NF_DROP;
 
 	return NF_ACCEPT;
@@ -4542,23 +4562,15 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	struct sock *sk;
 	struct avc_audit_data ad;
 	char *addrp;
-	u8 proto;
 	u8 secmark_active;
 	u8 peerlbl_active;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
-	ad.u.net.netif = ifindex;
-	ad.u.net.family = family;
-	if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
-		return NF_DROP;
-
 	/* If any sort of compatibility mode is enabled then handoff processing
 	 * to the selinux_ip_postroute_compat() function to deal with the
 	 * special handling.  We do this in an attempt to keep this function
 	 * as fast and as clean as possible. */
 	if (selinux_compat_net || !selinux_policycap_netpeer)
-		return selinux_ip_postroute_compat(skb, ifindex, &ad,
-						   family, addrp, proto);
+		return selinux_ip_postroute_compat(skb, ifindex, family);
 
 	/* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec
 	 * packet transformation so allow the packet to pass without any checks
@@ -4574,21 +4586,45 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	if (!secmark_active && !peerlbl_active)
 		return NF_ACCEPT;
 
-	/* if the packet is locally generated (skb->sk != NULL) then use the
-	 * socket's label as the peer label, otherwise the packet is being
-	 * forwarded through this system and we need to fetch the peer label
-	 * directly from the packet */
+	/* if the packet is being forwarded then get the peer label from the
+	 * packet itself; otherwise check to see if it is from a local
+	 * application or the kernel, if from an application get the peer label
+	 * from the sending socket, otherwise use the kernel's sid */
 	sk = skb->sk;
-	if (sk) {
+	if (sk == NULL) {
+		switch (family) {
+		case PF_INET:
+			if (IPCB(skb)->flags & IPSKB_FORWARDED)
+				secmark_perm = PACKET__FORWARD_OUT;
+			else
+				secmark_perm = PACKET__SEND;
+			break;
+		case PF_INET6:
+			if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
+				secmark_perm = PACKET__FORWARD_OUT;
+			else
+				secmark_perm = PACKET__SEND;
+			break;
+		default:
+			return NF_DROP;
+		}
+		if (secmark_perm == PACKET__FORWARD_OUT) {
+			if (selinux_skb_peerlbl_sid(skb, family, &peer_sid))
+				return NF_DROP;
+		} else
+			peer_sid = SECINITSID_KERNEL;
+	} else {
 		struct sk_security_struct *sksec = sk->sk_security;
 		peer_sid = sksec->sid;
 		secmark_perm = PACKET__SEND;
-	} else {
-		if (selinux_skb_peerlbl_sid(skb, family, &peer_sid))
-				return NF_DROP;
-		secmark_perm = PACKET__FORWARD_OUT;
 	}
 
+	AVC_AUDIT_DATA_INIT(&ad, NET);
+	ad.u.net.netif = ifindex;
+	ad.u.net.family = family;
+	if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL))
+		return NF_DROP;
+
 	if (secmark_active)
 		if (avc_has_perm(peer_sid, skb->secmark,
 				 SECCLASS_PACKET, secmark_perm, &ad))
-- 
cgit v1.2.3


From 99d854d231ce141850b988bdc7e2e7c78f49b03a Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:30 -0400
Subject: selinux: Fix a problem in security_netlbl_sid_to_secattr()

Currently when SELinux fails to allocate memory in
security_netlbl_sid_to_secattr() the NetLabel LSM domain field is set to
NULL which triggers the default NetLabel LSM domain mapping which may not
always be the desired mapping.  This patch fixes this by returning an error
when the kernel is unable to allocate memory.  This could result in more
failures on a system with heavy memory pressure but it is the "correct"
thing to do.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 security/selinux/ss/services.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'security')

diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8551952ef329..c8f688a10041 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2785,7 +2785,7 @@ netlbl_secattr_to_sid_return_cleanup:
  */
 int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr)
 {
-	int rc = -ENOENT;
+	int rc;
 	struct context *ctx;
 
 	if (!ss_initialized)
@@ -2793,10 +2793,16 @@ int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr)
 
 	read_lock(&policy_rwlock);
 	ctx = sidtab_search(&sidtab, sid);
-	if (ctx == NULL)
+	if (ctx == NULL) {
+		rc = -ENOENT;
 		goto netlbl_sid_to_secattr_failure;
+	}
 	secattr->domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1],
 				  GFP_ATOMIC);
+	if (secattr->domain == NULL) {
+		rc = -ENOMEM;
+		goto netlbl_sid_to_secattr_failure;
+	}
 	secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY;
 	mls_export_netlbl_lvl(ctx, secattr);
 	rc = mls_export_netlbl_cat(ctx, secattr);
-- 
cgit v1.2.3


From dfaebe9825ff34983778f287101bc5f3bce00640 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:31 -0400
Subject: selinux: Fix missing calls to netlbl_skbuff_err()

At some point I think I messed up and dropped the calls to netlbl_skbuff_err()
which are necessary for CIPSO to send error notifications to remote systems.
This patch re-introduces the error handling calls into the SELinux code.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 include/net/netlabel.h              |  6 ++++--
 net/netlabel/netlabel_kapi.c        |  5 +++--
 security/selinux/hooks.c            | 19 +++++++++++++++----
 security/selinux/include/netlabel.h |  9 +++++++++
 security/selinux/netlabel.c         | 20 +++++++++++++++++++-
 5 files changed, 50 insertions(+), 9 deletions(-)

(limited to 'security')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 5303749b7093..e16db0961265 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -382,7 +382,7 @@ int netlbl_sock_getattr(struct sock *sk,
 int netlbl_skbuff_getattr(const struct sk_buff *skb,
 			  u16 family,
 			  struct netlbl_lsm_secattr *secattr);
-void netlbl_skbuff_err(struct sk_buff *skb, int error);
+void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway);
 
 /*
  * LSM label mapping cache operations
@@ -454,7 +454,9 @@ static inline int netlbl_skbuff_getattr(const struct sk_buff *skb,
 {
 	return -ENOSYS;
 }
-static inline void netlbl_skbuff_err(struct sk_buff *skb, int error)
+static inline void netlbl_skbuff_err(struct sk_buff *skb,
+				     int error,
+				     int gateway)
 {
 	return;
 }
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 6c211fe97782..22faba620e4b 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -490,6 +490,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
  * netlbl_skbuff_err - Handle a LSM error on a sk_buff
  * @skb: the packet
  * @error: the error code
+ * @gateway: true if host is acting as a gateway, false otherwise
  *
  * Description:
  * Deal with a LSM problem when handling the packet in @skb, typically this is
@@ -497,10 +498,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
  * according to the packet's labeling protocol.
  *
  */
-void netlbl_skbuff_err(struct sk_buff *skb, int error)
+void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway)
 {
 	if (CIPSO_V4_OPTEXIST(skb))
-		cipso_v4_error(skb, error, 0);
+		cipso_v4_error(skb, error, gateway);
 }
 
 /**
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b520667a24be..a91146a6b37d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4101,6 +4101,8 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
 			return err;
 		err = avc_has_perm(sk_sid, peer_sid,
 				   SECCLASS_PEER, PEER__RECV, &ad);
+		if (err)
+			selinux_netlbl_err(skb, err, 0);
 	} else {
 		err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
 		if (err)
@@ -4156,10 +4158,14 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 			return err;
 		err = selinux_inet_sys_rcv_skb(skb->iif, addrp, family,
 					       peer_sid, &ad);
-		if (err)
+		if (err) {
+			selinux_netlbl_err(skb, err, 0);
 			return err;
+		}
 		err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER,
 				   PEER__RECV, &ad);
+		if (err)
+			selinux_netlbl_err(skb, err, 0);
 	}
 
 	if (secmark_active) {
@@ -4396,6 +4402,7 @@ out:
 static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 				       u16 family)
 {
+	int err;
 	char *addrp;
 	u32 peer_sid;
 	struct avc_audit_data ad;
@@ -4419,10 +4426,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
 		return NF_DROP;
 
-	if (peerlbl_active)
-		if (selinux_inet_sys_rcv_skb(ifindex, addrp, family,
-					     peer_sid, &ad) != 0)
+	if (peerlbl_active) {
+		err = selinux_inet_sys_rcv_skb(ifindex, addrp, family,
+					       peer_sid, &ad);
+		if (err) {
+			selinux_netlbl_err(skb, err, 1);
 			return NF_DROP;
+		}
+	}
 
 	if (secmark_active)
 		if (avc_has_perm(peer_sid, skb->secmark,
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index 487a7d81fe20..d4e3ac8a7fbf 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -39,6 +39,8 @@
 #ifdef CONFIG_NETLABEL
 void selinux_netlbl_cache_invalidate(void);
 
+void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway);
+
 void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec,
 				      int family);
 
@@ -63,6 +65,13 @@ static inline void selinux_netlbl_cache_invalidate(void)
 	return;
 }
 
+static inline void selinux_netlbl_err(struct sk_buff *skb,
+				      int error,
+				      int gateway)
+{
+	return;
+}
+
 static inline void selinux_netlbl_sk_security_reset(
 					       struct sk_security_struct *ssec,
 					       int family)
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index b9ce5fcf3432..4053f7fc95fb 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -107,6 +107,24 @@ void selinux_netlbl_cache_invalidate(void)
 	netlbl_cache_invalidate();
 }
 
+/**
+ * selinux_netlbl_err - Handle a NetLabel packet error
+ * @skb: the packet
+ * @error: the error code
+ * @gateway: true if host is acting as a gateway, false otherwise
+ *
+ * Description:
+ * When a packet is dropped due to a call to avc_has_perm() pass the error
+ * code to the NetLabel subsystem so any protocol specific processing can be
+ * done.  This is safe to call even if you are unsure if NetLabel labeling is
+ * present on the packet, NetLabel is smart enough to only act when it should.
+ *
+ */
+void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway)
+{
+	netlbl_skbuff_err(skb, error, gateway);
+}
+
 /**
  * selinux_netlbl_sk_security_reset - Reset the NetLabel fields
  * @ssec: the sk_security_struct
@@ -289,7 +307,7 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 		return 0;
 
 	if (nlbl_sid != SECINITSID_UNLABELED)
-		netlbl_skbuff_err(skb, rc);
+		netlbl_skbuff_err(skb, rc, 0);
 	return rc;
 }
 
-- 
cgit v1.2.3


From a8134296ba9940b5b271d908666e532d34430a3c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:31 -0400
Subject: smack: Fix missing calls to netlbl_skbuff_err()

Smack needs to call netlbl_skbuff_err() to let NetLabel do the necessary
protocol specific error handling.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
---
 security/smack/smack_lsm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'security')

diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 87d75417ea93..6e2dc0bab70d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2179,7 +2179,10 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 * This is the simplist possible security model
 	 * for networking.
 	 */
-	return smk_access(smack, ssp->smk_in, MAY_WRITE);
+	rc = smk_access(smack, ssp->smk_in, MAY_WRITE);
+	if (rc != 0)
+		netlbl_skbuff_err(skb, rc, 0);
+	return rc;
 }
 
 /**
-- 
cgit v1.2.3


From b1edeb102397546438ab4624489c6ccd7b410d97 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:31 -0400
Subject: netlabel: Replace protocol/NetLabel linking with refrerence counts

NetLabel has always had a list of backpointers in the CIPSO DOI definition
structure which pointed to the NetLabel LSM domain mapping structures which
referenced the CIPSO DOI struct.  The rationale for this was that when an
administrator removed a CIPSO DOI from the system all of the associated
NetLabel LSM domain mappings should be removed as well; a list of
backpointers made this a simple operation.

Unfortunately, while the backpointers did make the removal easier they were
a bit of a mess from an implementation point of view which was making
further development difficult.  Since the removal of a CIPSO DOI is a
realtively rare event it seems to make sense to remove this backpointer
list as the optimization was hurting us more then it was helping.  However,
we still need to be able to track when a CIPSO DOI definition is being used
so replace the backpointer list with a reference count.  In order to
preserve the current functionality of removing the associated LSM domain
mappings when a CIPSO DOI is removed we walk the LSM domain mapping table,
removing the relevant entries.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/cipso_ipv4.h           |  21 ++--
 net/ipv4/cipso_ipv4.c              | 235 ++++++++++++++++---------------------
 net/netlabel/netlabel_cipso_v4.c   |  77 ++++++------
 net/netlabel/netlabel_domainhash.c |  95 ++++++++-------
 net/netlabel/netlabel_domainhash.h |   2 +
 net/netlabel/netlabel_kapi.c       |  43 ++++---
 net/netlabel/netlabel_mgmt.c       |  24 +---
 security/smack/smackfs.c           |   4 +-
 8 files changed, 235 insertions(+), 266 deletions(-)

(limited to 'security')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index a6bb94530cfd..5fe6556fb3c5 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -40,6 +40,7 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <net/netlabel.h>
+#include <asm/atomic.h>
 
 /* known doi values */
 #define CIPSO_V4_DOI_UNKNOWN          0x00000000
@@ -79,10 +80,9 @@ struct cipso_v4_doi {
 	} map;
 	u8 tags[CIPSO_V4_TAG_MAXCNT];
 
-	u32 valid;
+	atomic_t refcount;
 	struct list_head list;
 	struct rcu_head rcu;
-	struct list_head dom_list;
 };
 
 /* Standard CIPSO mapping table */
@@ -128,25 +128,26 @@ extern int cipso_v4_rbm_strictvalid;
 
 #ifdef CONFIG_NETLABEL
 int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
-int cipso_v4_doi_remove(u32 doi,
-			struct netlbl_audit *audit_info,
-			void (*callback) (struct rcu_head * head));
+void cipso_v4_doi_free(struct cipso_v4_doi *doi_def);
+int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info);
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
+void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def);
 int cipso_v4_doi_walk(u32 *skip_cnt,
 		     int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
 	             void *cb_arg);
-int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain);
-int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
-			       const char *domain);
 #else
 static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 {
 	return -ENOSYS;
 }
 
+static inline void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+{
+	return;
+}
+
 static inline int cipso_v4_doi_remove(u32 doi,
-				    struct netlbl_audit *audit_info,
-				    void (*callback) (struct rcu_head * head))
+				      struct netlbl_audit *audit_info)
 {
 	return 0;
 }
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c0e4572cc90..bf87eddfec30 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -47,17 +47,7 @@
 #include <asm/bug.h>
 #include <asm/unaligned.h>
 
-struct cipso_v4_domhsh_entry {
-	char *domain;
-	u32 valid;
-	struct list_head list;
-	struct rcu_head rcu;
-};
-
 /* List of available DOI definitions */
-/* XXX - Updates should be minimal so having a single lock for the
- * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be
- * okay. */
 /* XXX - This currently assumes a minimal number of different DOIs in use,
  * if in practice there are a lot of different DOIs this list should
  * probably be turned into a hash table or something similar so we
@@ -193,25 +183,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap,
 		bitmap[byte_spot] &= ~bitmask;
 }
 
-/**
- * cipso_v4_doi_domhsh_free - Frees a domain list entry
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to a domain list entry can be released
- * safely.
- *
- */
-static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
-{
-	struct cipso_v4_domhsh_entry *ptr;
-
-	ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu);
-	kfree(ptr->domain);
-	kfree(ptr);
-}
-
 /**
  * cipso_v4_cache_entry_free - Frees a cache entry
  * @entry: the entry to free
@@ -457,7 +428,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
 	struct cipso_v4_doi *iter;
 
 	list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list)
-		if (iter->doi == doi && iter->valid)
+		if (iter->doi == doi && atomic_read(&iter->refcount))
 			return iter;
 	return NULL;
 }
@@ -501,9 +472,8 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 		}
 	}
 
-	doi_def->valid = 1;
+	atomic_set(&doi_def->refcount, 1);
 	INIT_RCU_HEAD(&doi_def->rcu);
-	INIT_LIST_HEAD(&doi_def->dom_list);
 
 	spin_lock(&cipso_v4_doi_list_lock);
 	if (cipso_v4_doi_search(doi_def->doi) != NULL)
@@ -518,60 +488,130 @@ doi_add_failure:
 	return -EEXIST;
 }
 
+/**
+ * cipso_v4_doi_free - Frees a DOI definition
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function frees all of the memory associated with a DOI definition.
+ *
+ */
+void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL)
+		return;
+
+	switch (doi_def->type) {
+	case CIPSO_V4_MAP_STD:
+		kfree(doi_def->map.std->lvl.cipso);
+		kfree(doi_def->map.std->lvl.local);
+		kfree(doi_def->map.std->cat.cipso);
+		kfree(doi_def->map.std->cat.local);
+		break;
+	}
+	kfree(doi_def);
+}
+
+/**
+ * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer
+ * @entry: the entry's RCU field
+ *
+ * Description:
+ * This function is designed to be used as a callback to the call_rcu()
+ * function so that the memory allocated to the DOI definition can be released
+ * safely.
+ *
+ */
+static void cipso_v4_doi_free_rcu(struct rcu_head *entry)
+{
+	struct cipso_v4_doi *doi_def;
+
+	doi_def = container_of(entry, struct cipso_v4_doi, rcu);
+	cipso_v4_doi_free(doi_def);
+}
+
 /**
  * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
  * @doi: the DOI value
  * @audit_secid: the LSM secid to use in the audit message
- * @callback: the DOI cleanup/free callback
  *
  * Description:
- * Removes a DOI definition from the CIPSO engine, @callback is called to
- * free any memory.  The NetLabel routines will be called to release their own
- * LSM domain mappings as well as our own domain list.  Returns zero on
- * success and negative values on failure.
+ * Removes a DOI definition from the CIPSO engine.  The NetLabel routines will
+ * be called to release their own LSM domain mappings as well as our own
+ * domain list.  Returns zero on success and negative values on failure.
  *
  */
-int cipso_v4_doi_remove(u32 doi,
-			struct netlbl_audit *audit_info,
-			void (*callback) (struct rcu_head * head))
+int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
 {
 	struct cipso_v4_doi *doi_def;
-	struct cipso_v4_domhsh_entry *dom_iter;
 
 	spin_lock(&cipso_v4_doi_list_lock);
 	doi_def = cipso_v4_doi_search(doi);
-	if (doi_def != NULL) {
-		doi_def->valid = 0;
-		list_del_rcu(&doi_def->list);
+	if (doi_def == NULL) {
 		spin_unlock(&cipso_v4_doi_list_lock);
-		rcu_read_lock();
-		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
-			if (dom_iter->valid)
-				netlbl_cfg_map_del(dom_iter->domain,
-						   audit_info);
-		rcu_read_unlock();
-		cipso_v4_cache_invalidate();
-		call_rcu(&doi_def->rcu, callback);
-		return 0;
+		return -ENOENT;
+	}
+	if (!atomic_dec_and_test(&doi_def->refcount)) {
+		spin_unlock(&cipso_v4_doi_list_lock);
+		return -EBUSY;
 	}
+	list_del_rcu(&doi_def->list);
 	spin_unlock(&cipso_v4_doi_list_lock);
 
-	return -ENOENT;
+	cipso_v4_cache_invalidate();
+	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+
+	return 0;
 }
 
 /**
- * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition
+ * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition
  * @doi: the DOI value
  *
  * Description:
  * Searches for a valid DOI definition and if one is found it is returned to
  * the caller.  Otherwise NULL is returned.  The caller must ensure that
- * rcu_read_lock() is held while accessing the returned definition.
+ * rcu_read_lock() is held while accessing the returned definition and the DOI
+ * definition reference count is decremented when the caller is done.
  *
  */
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi)
 {
-	return cipso_v4_doi_search(doi);
+	struct cipso_v4_doi *doi_def;
+
+	rcu_read_lock();
+	doi_def = cipso_v4_doi_search(doi);
+	if (doi_def == NULL)
+		goto doi_getdef_return;
+	if (!atomic_inc_not_zero(&doi_def->refcount))
+		doi_def = NULL;
+
+doi_getdef_return:
+	rcu_read_unlock();
+	return doi_def;
+}
+
+/**
+ * cipso_v4_doi_putdef - Releases a reference for the given DOI definition
+ * @doi_def: the DOI definition
+ *
+ * Description:
+ * Releases a DOI definition reference obtained from cipso_v4_doi_getdef().
+ *
+ */
+void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
+{
+	if (doi_def == NULL)
+		return;
+
+	if (!atomic_dec_and_test(&doi_def->refcount))
+		return;
+	spin_lock(&cipso_v4_doi_list_lock);
+	list_del_rcu(&doi_def->list);
+	spin_unlock(&cipso_v4_doi_list_lock);
+
+	cipso_v4_cache_invalidate();
+	call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
 }
 
 /**
@@ -597,7 +637,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list)
-		if (iter_doi->valid) {
+		if (atomic_read(&iter_doi->refcount) > 0) {
 			if (doi_cnt++ < *skip_cnt)
 				continue;
 			ret_val = callback(iter_doi, cb_arg);
@@ -613,85 +653,6 @@ doi_walk_return:
 	return ret_val;
 }
 
-/**
- * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to add
- *
- * Description:
- * Adds the @domain to the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel).  This function
- * does allocate memory.  Returns zero on success, negative values on failure.
- *
- */
-int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain)
-{
-	struct cipso_v4_domhsh_entry *iter;
-	struct cipso_v4_domhsh_entry *new_dom;
-
-	new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL);
-	if (new_dom == NULL)
-		return -ENOMEM;
-	if (domain) {
-		new_dom->domain = kstrdup(domain, GFP_KERNEL);
-		if (new_dom->domain == NULL) {
-			kfree(new_dom);
-			return -ENOMEM;
-		}
-	}
-	new_dom->valid = 1;
-	INIT_RCU_HEAD(&new_dom->rcu);
-
-	spin_lock(&cipso_v4_doi_list_lock);
-	list_for_each_entry(iter, &doi_def->dom_list, list)
-		if (iter->valid &&
-		    ((domain != NULL && iter->domain != NULL &&
-		      strcmp(iter->domain, domain) == 0) ||
-		     (domain == NULL && iter->domain == NULL))) {
-			spin_unlock(&cipso_v4_doi_list_lock);
-			kfree(new_dom->domain);
-			kfree(new_dom);
-			return -EEXIST;
-		}
-	list_add_tail_rcu(&new_dom->list, &doi_def->dom_list);
-	spin_unlock(&cipso_v4_doi_list_lock);
-
-	return 0;
-}
-
-/**
- * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition
- * @doi_def: the DOI definition
- * @domain: the domain to remove
- *
- * Description:
- * Removes the @domain from the DOI specified by @doi_def, this function
- * should only be called by external functions (i.e. NetLabel).   Returns zero
- * on success and negative values on error.
- *
- */
-int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def,
-			       const char *domain)
-{
-	struct cipso_v4_domhsh_entry *iter;
-
-	spin_lock(&cipso_v4_doi_list_lock);
-	list_for_each_entry(iter, &doi_def->dom_list, list)
-		if (iter->valid &&
-		    ((domain != NULL && iter->domain != NULL &&
-		      strcmp(iter->domain, domain) == 0) ||
-		     (domain == NULL && iter->domain == NULL))) {
-			iter->valid = 0;
-			list_del_rcu(&iter->list);
-			spin_unlock(&cipso_v4_doi_list_lock);
-			call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free);
-			return 0;
-		}
-	spin_unlock(&cipso_v4_doi_list_lock);
-
-	return -ENOENT;
-}
-
 /*
  * Label Mapping Functions
  */
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index aaf50032b3ac..5c4f60bbc82d 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -43,6 +43,7 @@
 #include "netlabel_user.h"
 #include "netlabel_cipso_v4.h"
 #include "netlabel_mgmt.h"
+#include "netlabel_domainhash.h"
 
 /* Argument struct for cipso_v4_doi_walk() */
 struct netlbl_cipsov4_doiwalk_arg {
@@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg {
 	u32 seq;
 };
 
+/* Argument struct for netlbl_domhsh_walk() */
+struct netlbl_domhsh_walk_arg {
+	struct netlbl_audit *audit_info;
+	u32 doi;
+};
+
 /* NetLabel Generic NETLINK CIPSOv4 family */
 static struct genl_family netlbl_cipsov4_gnl_family = {
 	.id = GENL_ID_GENERATE,
@@ -80,32 +87,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1
  * Helper Functions
  */
 
-/**
- * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition
- * @entry: the entry's RCU field
- *
- * Description:
- * This function is designed to be used as a callback to the call_rcu()
- * function so that the memory allocated to the DOI definition can be released
- * safely.
- *
- */
-void netlbl_cipsov4_doi_free(struct rcu_head *entry)
-{
-	struct cipso_v4_doi *ptr;
-
-	ptr = container_of(entry, struct cipso_v4_doi, rcu);
-	switch (ptr->type) {
-	case CIPSO_V4_MAP_STD:
-		kfree(ptr->map.std->lvl.cipso);
-		kfree(ptr->map.std->lvl.local);
-		kfree(ptr->map.std->cat.cipso);
-		kfree(ptr->map.std->cat.local);
-		break;
-	}
-	kfree(ptr);
-}
-
 /**
  * netlbl_cipsov4_add_common - Parse the common sections of a ADD message
  * @info: the Generic NETLINK info block
@@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
 
 add_std_failure:
 	if (doi_def)
-		netlbl_cipsov4_doi_free(&doi_def->rcu);
+		cipso_v4_doi_free(doi_def);
 	return ret_val;
 }
 
@@ -379,7 +360,7 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info)
 	return 0;
 
 add_pass_failure:
-	netlbl_cipsov4_doi_free(&doi_def->rcu);
+	cipso_v4_doi_free(doi_def);
 	return ret_val;
 }
 
@@ -667,6 +648,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
 	return skb->len;
 }
 
+/**
+ * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE
+ * @entry: LSM domain mapping entry
+ * @arg: the netlbl_domhsh_walk_arg structure
+ *
+ * Description:
+ * This function is intended for use by netlbl_cipsov4_remove() as the callback
+ * for the netlbl_domhsh_walk() function; it removes LSM domain map entries
+ * which are associated with the CIPSO DOI specified in @arg.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg)
+{
+	struct netlbl_domhsh_walk_arg *cb_arg = arg;
+
+	if (entry->type == NETLBL_NLTYPE_CIPSOV4 &&
+	    entry->type_def.cipsov4->doi == cb_arg->doi)
+		return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info);
+
+	return 0;
+}
+
 /**
  * netlbl_cipsov4_remove - Handle a REMOVE message
  * @skb: the NETLINK buffer
@@ -681,8 +685,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -EINVAL;
 	u32 doi = 0;
+	struct netlbl_domhsh_walk_arg cb_arg;
 	struct audit_buffer *audit_buf;
 	struct netlbl_audit audit_info;
+	u32 skip_bkt = 0;
+	u32 skip_chain = 0;
 
 	if (!info->attrs[NLBL_CIPSOV4_A_DOI])
 		return -EINVAL;
@@ -690,11 +697,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 	doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
 	netlbl_netlink_auditinfo(skb, &audit_info);
 
-	ret_val = cipso_v4_doi_remove(doi,
-				      &audit_info,
-				      netlbl_cipsov4_doi_free);
-	if (ret_val == 0)
-		atomic_dec(&netlabel_mgmt_protocount);
+	cb_arg.doi = doi;
+	cb_arg.audit_info = &audit_info;
+	ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
+				     netlbl_cipsov4_remove_cb, &cb_arg);
+	if (ret_val == 0 || ret_val == -ENOENT) {
+		ret_val = cipso_v4_doi_remove(doi, &audit_info);
+		if (ret_val == 0)
+			atomic_dec(&netlabel_mgmt_protocount);
+	}
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
 					      &audit_info);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index dc42206c4312..0243f0c57b41 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -217,20 +217,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 	u32 bkt;
 	struct audit_buffer *audit_buf;
 
-	switch (entry->type) {
-	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = 0;
-		break;
-	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4,
-						  entry->domain);
-		break;
-	default:
-		return -EINVAL;
-	}
-	if (ret_val != 0)
-		return ret_val;
-
 	entry->valid = 1;
 	INIT_RCU_HEAD(&entry->rcu);
 
@@ -271,16 +257,6 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 	}
 	rcu_read_unlock();
 
-	if (ret_val != 0) {
-		switch (entry->type) {
-		case NETLBL_NLTYPE_CIPSOV4:
-			if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
-						       entry->domain) != 0)
-				BUG();
-			break;
-		}
-	}
-
 	return ret_val;
 }
 
@@ -302,35 +278,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
 }
 
 /**
- * netlbl_domhsh_remove - Removes an entry from the domain hash table
- * @domain: the domain to remove
+ * netlbl_domhsh_remove_entry - Removes a given entry from the domain table
+ * @entry: the entry to remove
  * @audit_info: NetLabel audit information
  *
  * Description:
  * Removes an entry from the domain hash table and handles any updates to the
- * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
- * negative on failure.
+ * lower level protocol handler (i.e. CIPSO).  Caller is responsible for
+ * ensuring that the RCU read lock is held.  Returns zero on success, negative
+ * on failure.
  *
  */
-int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+			       struct netlbl_audit *audit_info)
 {
-	int ret_val = -ENOENT;
-	struct netlbl_dom_map *entry;
+	int ret_val = 0;
 	struct audit_buffer *audit_buf;
 
-	rcu_read_lock();
-	if (domain)
-		entry = netlbl_domhsh_search(domain);
-	else
-		entry = netlbl_domhsh_search_def(domain);
 	if (entry == NULL)
-		goto remove_return;
-	switch (entry->type) {
-	case NETLBL_NLTYPE_CIPSOV4:
-		cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4,
-					   entry->domain);
-		break;
-	}
+		return -ENOENT;
+
 	spin_lock(&netlbl_domhsh_lock);
 	if (entry->valid) {
 		entry->valid = 0;
@@ -338,8 +305,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
 			list_del_rcu(&entry->list);
 		else
 			rcu_assign_pointer(netlbl_domhsh_def, NULL);
-		ret_val = 0;
-	}
+	} else
+		ret_val = -ENOENT;
 	spin_unlock(&netlbl_domhsh_lock);
 
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
@@ -351,10 +318,42 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
 		audit_log_end(audit_buf);
 	}
 
-remove_return:
-	rcu_read_unlock();
-	if (ret_val == 0)
+	if (ret_val == 0) {
+		switch (entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			cipso_v4_doi_putdef(entry->type_def.cipsov4);
+			break;
+		}
 		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+	}
+
+	return ret_val;
+}
+
+/**
+ * netlbl_domhsh_remove - Removes an entry from the domain hash table
+ * @domain: the domain to remove
+ * @audit_info: NetLabel audit information
+ *
+ * Description:
+ * Removes an entry from the domain hash table and handles any updates to the
+ * lower level protocol handler (i.e. CIPSO).  Returns zero on success,
+ * negative on failure.
+ *
+ */
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
+{
+	int ret_val;
+	struct netlbl_dom_map *entry;
+
+	rcu_read_lock();
+	if (domain)
+		entry = netlbl_domhsh_search(domain);
+	else
+		entry = netlbl_domhsh_search_def(domain);
+	ret_val = netlbl_domhsh_remove_entry(entry, audit_info);
+	rcu_read_unlock();
+
 	return ret_val;
 }
 
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 8220990ceb96..afcc41a7432d 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -61,6 +61,8 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 		      struct netlbl_audit *audit_info);
 int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
 			      struct netlbl_audit *audit_info);
+int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
+			       struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 22faba620e4b..7d8ecea93914 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -121,10 +121,15 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 			       struct netlbl_audit *audit_info)
 {
 	int ret_val = -ENOMEM;
+	u32 doi;
+	u32 doi_type;
 	struct netlbl_dom_map *entry;
 	const char *type_str;
 	struct audit_buffer *audit_buf;
 
+	doi = doi_def->doi;
+	doi_type = doi_def->type;
+
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (entry == NULL)
 		return -ENOMEM;
@@ -133,32 +138,25 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def,
 		if (entry->domain == NULL)
 			goto cfg_cipsov4_add_map_failure;
 	}
-	entry->type = NETLBL_NLTYPE_CIPSOV4;
-	entry->type_def.cipsov4 = doi_def;
-
-	/* Grab a RCU read lock here so nothing happens to the doi_def variable
-	 * between adding it to the CIPSOv4 protocol engine and adding a
-	 * domain mapping for it. */
 
-	rcu_read_lock();
 	ret_val = cipso_v4_doi_add(doi_def);
 	if (ret_val != 0)
-		goto cfg_cipsov4_add_map_failure_unlock;
+		goto cfg_cipsov4_add_map_failure_remove_doi;
+	entry->type = NETLBL_NLTYPE_CIPSOV4;
+	entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi);
+	if (entry->type_def.cipsov4 == NULL) {
+		ret_val = -ENOENT;
+		goto cfg_cipsov4_add_map_failure_remove_doi;
+	}
 	ret_val = netlbl_domhsh_add(entry, audit_info);
 	if (ret_val != 0)
-		goto cfg_cipsov4_add_map_failure_remove_doi;
-	rcu_read_unlock();
-
-	return 0;
+		goto cfg_cipsov4_add_map_failure_release_doi;
 
-cfg_cipsov4_add_map_failure_remove_doi:
-	cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free);
-cfg_cipsov4_add_map_failure_unlock:
-	rcu_read_unlock();
+cfg_cipsov4_add_map_return:
 	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
 					      audit_info);
 	if (audit_buf != NULL) {
-		switch (doi_def->type) {
+		switch (doi_type) {
 		case CIPSO_V4_MAP_STD:
 			type_str = "std";
 			break;
@@ -170,14 +168,21 @@ cfg_cipsov4_add_map_failure_unlock:
 		}
 		audit_log_format(audit_buf,
 				 " cipso_doi=%u cipso_type=%s res=%u",
-				 doi_def->doi, type_str, ret_val == 0 ? 1 : 0);
+				 doi, type_str, ret_val == 0 ? 1 : 0);
 		audit_log_end(audit_buf);
 	}
+
+	return ret_val;
+
+cfg_cipsov4_add_map_failure_release_doi:
+	cipso_v4_doi_putdef(doi_def);
+cfg_cipsov4_add_map_failure_remove_doi:
+	cipso_v4_doi_remove(doi, audit_info);
 cfg_cipsov4_add_map_failure:
 	if (entry != NULL)
 		kfree(entry->domain);
 	kfree(entry);
-	return ret_val;
+	goto cfg_cipsov4_add_map_return;
 }
 
 /*
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 44be5d5261f4..c4e18c7bc0c1 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -122,18 +122,12 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 			goto add_failure;
 
 		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		/* We should be holding a rcu_read_lock() here while we hold
-		 * the result but since the entry will always be deleted when
-		 * the CIPSO DOI is deleted we aren't going to keep the
-		 * lock. */
-		rcu_read_lock();
 		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL) {
-			rcu_read_unlock();
+		if (entry->type_def.cipsov4 == NULL)
 			goto add_failure;
-		}
 		ret_val = netlbl_domhsh_add(entry, &audit_info);
-		rcu_read_unlock();
+		if (ret_val != 0)
+			cipso_v4_doi_putdef(entry->type_def.cipsov4);
 		break;
 	default:
 		goto add_failure;
@@ -294,18 +288,12 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 			goto adddef_failure;
 
 		tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]);
-		/* We should be holding a rcu_read_lock() here while we hold
-		 * the result but since the entry will always be deleted when
-		 * the CIPSO DOI is deleted we aren't going to keep the
-		 * lock. */
-		rcu_read_lock();
 		entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val);
-		if (entry->type_def.cipsov4 == NULL) {
-			rcu_read_unlock();
+		if (entry->type_def.cipsov4 == NULL)
 			goto adddef_failure;
-		}
 		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
-		rcu_read_unlock();
+		if (ret_val != 0)
+			cipso_v4_doi_putdef(entry->type_def.cipsov4);
 		break;
 	default:
 		goto adddef_failure;
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 271a835fbbe3..9733f8eb1a2a 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -343,9 +343,11 @@ static void smk_cipso_doi(void)
 		doip->tags[rc] = CIPSO_V4_TAG_INVALID;
 
 	rc = netlbl_cfg_cipsov4_add_map(doip, NULL, &audit_info);
-	if (rc != 0)
+	if (rc != 0) {
 		printk(KERN_WARNING "%s:%d add rc = %d\n",
 		       __func__, __LINE__, rc);
+		kfree(doip);
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 948bf85c1bc9a84754786a9d5dd99b7ecc46451e Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:32 -0400
Subject: netlabel: Add functionality to set the security attributes of a
 packet

This patch builds upon the new NetLabel address selector functionality by
providing the NetLabel KAPI and CIPSO engine support needed to enable the
new packet-based labeling.  The only new addition to the NetLabel KAPI at
this point is shown below:

 * int netlbl_skbuff_setattr(skb, family, secattr)

... and is designed to be called from a Netfilter hook after the packet's
IP header has been populated such as in the FORWARD or LOCAL_OUT hooks.

This patch also provides the necessary SELinux hooks to support this new
functionality.  Smack support is not currently included due to uncertainty
regarding the permissions needed to expand the Smack network access controls.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/cipso_ipv4.h            |  16 +++
 include/net/netlabel.h              |   9 ++
 net/ipv4/cipso_ipv4.c               | 222 +++++++++++++++++++++++++++++-------
 net/netlabel/netlabel_kapi.c        |  60 ++++++++++
 security/selinux/hooks.c            |  50 +++++++-
 security/selinux/include/netlabel.h |   9 ++
 security/selinux/include/objsec.h   |   1 +
 security/selinux/netlabel.c         |  68 ++++++++++-
 8 files changed, 393 insertions(+), 42 deletions(-)

(limited to 'security')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 5fe6556fb3c5..2ce093ba553d 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -208,6 +208,10 @@ int cipso_v4_sock_setattr(struct sock *sk,
 			  const struct cipso_v4_doi *doi_def,
 			  const struct netlbl_lsm_secattr *secattr);
 int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
+int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr);
+int cipso_v4_skbuff_delattr(struct sk_buff *skb);
 int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 			    struct netlbl_lsm_secattr *secattr);
 int cipso_v4_validate(unsigned char **option);
@@ -232,6 +236,18 @@ static inline int cipso_v4_sock_getattr(struct sock *sk,
 	return -ENOSYS;
 }
 
+static inline int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+				      const struct cipso_v4_doi *doi_def,
+				      const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
+
+static inline int cipso_v4_skbuff_delattr(struct sk_buff *skb)
+{
+	return -ENOSYS;
+}
+
 static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 					  struct netlbl_lsm_secattr *secattr)
 {
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 0729f8ce5042..3f67e6d49e40 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -382,6 +382,9 @@ int netlbl_sock_setattr(struct sock *sk,
 			const struct netlbl_lsm_secattr *secattr);
 int netlbl_sock_getattr(struct sock *sk,
 			struct netlbl_lsm_secattr *secattr);
+int netlbl_skbuff_setattr(struct sk_buff *skb,
+			  u16 family,
+			  const struct netlbl_lsm_secattr *secattr);
 int netlbl_skbuff_getattr(const struct sk_buff *skb,
 			  u16 family,
 			  struct netlbl_lsm_secattr *secattr);
@@ -451,6 +454,12 @@ static inline int netlbl_sock_getattr(struct sock *sk,
 {
 	return -ENOSYS;
 }
+static inline int netlbl_skbuff_setattr(struct sk_buff *skb,
+				      u16 family,
+				      const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
 static inline int netlbl_skbuff_getattr(const struct sk_buff *skb,
 					u16 family,
 					struct netlbl_lsm_secattr *secattr)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index bf87eddfec30..e13d6dbb66ab 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -13,7 +13,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -1665,48 +1665,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 }
 
 /**
- * cipso_v4_sock_setattr - Add a CIPSO option to a socket
- * @sk: the socket
+ * cipso_v4_genopt - Generate a CIPSO option
+ * @buf: the option buffer
+ * @buf_len: the size of opt_buf
  * @doi_def: the CIPSO DOI to use
- * @secattr: the specific security attributes of the socket
+ * @secattr: the security attributes
  *
  * Description:
- * Set the CIPSO option on the given socket using the DOI definition and
- * security attributes passed to the function.  This function requires
- * exclusive access to @sk, which means it either needs to be in the
- * process of being created or locked.  Returns zero on success and negative
- * values on failure.
+ * Generate a CIPSO option using the DOI definition and security attributes
+ * passed to the function.  Returns the length of the option on success and
+ * negative values on failure.
  *
  */
-int cipso_v4_sock_setattr(struct sock *sk,
-			  const struct cipso_v4_doi *doi_def,
-			  const struct netlbl_lsm_secattr *secattr)
+static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
+			   const struct cipso_v4_doi *doi_def,
+			   const struct netlbl_lsm_secattr *secattr)
 {
-	int ret_val = -EPERM;
+	int ret_val;
 	u32 iter;
-	unsigned char *buf;
-	u32 buf_len = 0;
-	u32 opt_len;
-	struct ip_options *opt = NULL;
-	struct inet_sock *sk_inet;
-	struct inet_connection_sock *sk_conn;
-
-	/* In the case of sock_create_lite(), the sock->sk field is not
-	 * defined yet but it is not a problem as the only users of these
-	 * "lite" PF_INET sockets are functions which do an accept() call
-	 * afterwards so we will label the socket as part of the accept(). */
-	if (sk == NULL)
-		return 0;
 
-	/* We allocate the maximum CIPSO option size here so we are probably
-	 * being a little wasteful, but it makes our life _much_ easier later
-	 * on and after all we are only talking about 40 bytes. */
-	buf_len = CIPSO_V4_OPT_LEN_MAX;
-	buf = kmalloc(buf_len, GFP_ATOMIC);
-	if (buf == NULL) {
-		ret_val = -ENOMEM;
-		goto socket_setattr_failure;
-	}
+	if (buf_len <= CIPSO_V4_HDR_LEN)
+		return -ENOSPC;
 
 	/* XXX - This code assumes only one tag per CIPSO option which isn't
 	 * really a good assumption to make but since we only support the MAC
@@ -1734,8 +1713,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
 						   buf_len - CIPSO_V4_HDR_LEN);
 			break;
 		default:
-			ret_val = -EPERM;
-			goto socket_setattr_failure;
+			return -EPERM;
 		}
 
 		iter++;
@@ -1743,9 +1721,58 @@ int cipso_v4_sock_setattr(struct sock *sk,
 		 iter < CIPSO_V4_TAG_MAXCNT &&
 		 doi_def->tags[iter] != CIPSO_V4_TAG_INVALID);
 	if (ret_val < 0)
-		goto socket_setattr_failure;
+		return ret_val;
 	cipso_v4_gentag_hdr(doi_def, buf, ret_val);
-	buf_len = CIPSO_V4_HDR_LEN + ret_val;
+	return CIPSO_V4_HDR_LEN + ret_val;
+}
+
+/**
+ * cipso_v4_sock_setattr - Add a CIPSO option to a socket
+ * @sk: the socket
+ * @doi_def: the CIPSO DOI to use
+ * @secattr: the specific security attributes of the socket
+ *
+ * Description:
+ * Set the CIPSO option on the given socket using the DOI definition and
+ * security attributes passed to the function.  This function requires
+ * exclusive access to @sk, which means it either needs to be in the
+ * process of being created or locked.  Returns zero on success and negative
+ * values on failure.
+ *
+ */
+int cipso_v4_sock_setattr(struct sock *sk,
+			  const struct cipso_v4_doi *doi_def,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val = -EPERM;
+	unsigned char *buf = NULL;
+	u32 buf_len;
+	u32 opt_len;
+	struct ip_options *opt = NULL;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
+
+	/* In the case of sock_create_lite(), the sock->sk field is not
+	 * defined yet but it is not a problem as the only users of these
+	 * "lite" PF_INET sockets are functions which do an accept() call
+	 * afterwards so we will label the socket as part of the accept(). */
+	if (sk == NULL)
+		return 0;
+
+	/* We allocate the maximum CIPSO option size here so we are probably
+	 * being a little wasteful, but it makes our life _much_ easier later
+	 * on and after all we are only talking about 40 bytes. */
+	buf_len = CIPSO_V4_OPT_LEN_MAX;
+	buf = kmalloc(buf_len, GFP_ATOMIC);
+	if (buf == NULL) {
+		ret_val = -ENOMEM;
+		goto socket_setattr_failure;
+	}
+
+	ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+	if (ret_val < 0)
+		goto socket_setattr_failure;
+	buf_len = ret_val;
 
 	/* We can't use ip_options_get() directly because it makes a call to
 	 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
@@ -1853,6 +1880,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 				secattr);
 }
 
+/**
+ * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet
+ * @skb: the packet
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Set the CIPSO option on the given packet based on the security attributes.
+ * Returns a pointer to the IP header on success and NULL on failure.
+ *
+ */
+int cipso_v4_skbuff_setattr(struct sk_buff *skb,
+			    const struct cipso_v4_doi *doi_def,
+			    const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct iphdr *iph;
+	struct ip_options *opt = &IPCB(skb)->opt;
+	unsigned char buf[CIPSO_V4_OPT_LEN_MAX];
+	u32 buf_len = CIPSO_V4_OPT_LEN_MAX;
+	u32 opt_len;
+	int len_delta;
+
+	buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+	if (buf_len < 0)
+		return buf_len;
+	opt_len = (buf_len + 3) & ~3;
+
+	/* we overwrite any existing options to ensure that we have enough
+	 * room for the CIPSO option, the reason is that we _need_ to guarantee
+	 * that the security label is applied to the packet - we do the same
+	 * thing when using the socket options and it hasn't caused a problem,
+	 * if we need to we can always revisit this choice later */
+
+	len_delta = opt_len - opt->optlen;
+	/* if we don't ensure enough headroom we could panic on the skb_push()
+	 * call below so make sure we have enough, we are also "mangling" the
+	 * packet so we should probably do a copy-on-write call anyway */
+	ret_val = skb_cow(skb, skb_headroom(skb) + len_delta);
+	if (ret_val < 0)
+		return ret_val;
+
+	if (len_delta > 0) {
+		/* we assume that the header + opt->optlen have already been
+		 * "pushed" in ip_options_build() or similar */
+		iph = ip_hdr(skb);
+		skb_push(skb, len_delta);
+		memmove((char *)iph - len_delta, iph, iph->ihl << 2);
+		skb_reset_network_header(skb);
+		iph = ip_hdr(skb);
+	} else if (len_delta < 0) {
+		iph = ip_hdr(skb);
+		memset(iph + 1, IPOPT_NOP, opt->optlen);
+	} else
+		iph = ip_hdr(skb);
+
+	if (opt->optlen > 0)
+		memset(opt, 0, sizeof(*opt));
+	opt->optlen = opt_len;
+	opt->cipso = sizeof(struct iphdr);
+	opt->is_changed = 1;
+
+	/* we have to do the following because we are being called from a
+	 * netfilter hook which means the packet already has had the header
+	 * fields populated and the checksum calculated - yes this means we
+	 * are doing more work than needed but we do it to keep the core
+	 * stack clean and tidy */
+	memcpy(iph + 1, buf, buf_len);
+	if (opt_len > buf_len)
+		memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len);
+	if (len_delta != 0) {
+		iph->ihl = 5 + (opt_len >> 2);
+		iph->tot_len = htons(skb->len);
+	}
+	ip_send_check(iph);
+
+	return 0;
+}
+
+/**
+ * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet
+ * @skb: the packet
+ *
+ * Description:
+ * Removes any and all CIPSO options from the given packet.  Returns zero on
+ * success, negative values on failure.
+ *
+ */
+int cipso_v4_skbuff_delattr(struct sk_buff *skb)
+{
+	int ret_val;
+	struct iphdr *iph;
+	struct ip_options *opt = &IPCB(skb)->opt;
+	unsigned char *cipso_ptr;
+
+	if (opt->cipso == 0)
+		return 0;
+
+	/* since we are changing the packet we should make a copy */
+	ret_val = skb_cow(skb, skb_headroom(skb));
+	if (ret_val < 0)
+		return ret_val;
+
+	/* the easiest thing to do is just replace the cipso option with noop
+	 * options since we don't change the size of the packet, although we
+	 * still need to recalculate the checksum */
+
+	iph = ip_hdr(skb);
+	cipso_ptr = (unsigned char *)iph + opt->cipso;
+	memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]);
+	opt->cipso = 0;
+	opt->is_changed = 1;
+
+	ip_send_check(iph);
+
+	return 0;
+}
+
 /**
  * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
  * @skb: the packet
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 8b820dc98060..cc8047d1f505 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -472,6 +472,66 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 	return cipso_v4_sock_getattr(sk, secattr);
 }
 
+/**
+ * netlbl_skbuff_setattr - Label a packet using the correct protocol
+ * @skb: the packet
+ * @family: protocol family
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given packet using the security attributes
+ * specified in @secattr.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_skbuff_setattr(struct sk_buff *skb,
+			  u16 family,
+			  const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct iphdr *hdr4;
+	struct netlbl_domaddr4_map *af4_entry;
+
+	rcu_read_lock();
+	switch (family) {
+	case AF_INET:
+		hdr4 = ip_hdr(skb);
+		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						       hdr4->daddr);
+		if (af4_entry == NULL) {
+			ret_val = -ENOENT;
+			goto skbuff_setattr_return;
+		}
+		switch (af4_entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			ret_val = cipso_v4_skbuff_setattr(skb,
+						   af4_entry->type_def.cipsov4,
+						   secattr);
+			break;
+		case NETLBL_NLTYPE_UNLABELED:
+			/* just delete the protocols we support for right now
+			 * but we could remove other protocols if needed */
+			ret_val = cipso_v4_skbuff_delattr(skb);
+			break;
+		default:
+			ret_val = -ENOENT;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		/* since we don't support any IPv6 labeling protocols right
+		 * now we can optimize everything away until we do */
+		ret_val = 0;
+		break;
+#endif /* IPv6 */
+	default:
+		ret_val = 0;
+	}
+
+skbuff_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
 /**
  * netlbl_skbuff_getattr - Determine the security attributes of a packet
  * @skb: the packet
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a91146a6b37d..7432bdd5d367 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4407,13 +4407,15 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	u32 peer_sid;
 	struct avc_audit_data ad;
 	u8 secmark_active;
+	u8 netlbl_active;
 	u8 peerlbl_active;
 
 	if (!selinux_policycap_netpeer)
 		return NF_ACCEPT;
 
 	secmark_active = selinux_secmark_enabled();
-	peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+	netlbl_active = netlbl_enabled();
+	peerlbl_active = netlbl_active || selinux_xfrm_enabled();
 	if (!secmark_active && !peerlbl_active)
 		return NF_ACCEPT;
 
@@ -4440,6 +4442,14 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 				 SECCLASS_PACKET, PACKET__FORWARD_IN, &ad))
 			return NF_DROP;
 
+	if (netlbl_active)
+		/* we do this in the FORWARD path and not the POST_ROUTING
+		 * path because we want to make sure we apply the necessary
+		 * labeling before IPsec is applied so we can leverage AH
+		 * protection */
+		if (selinux_netlbl_skbuff_setsid(skb, family, peer_sid) != 0)
+			return NF_DROP;
+
 	return NF_ACCEPT;
 }
 
@@ -4463,6 +4473,37 @@ static unsigned int selinux_ipv6_forward(unsigned int hooknum,
 }
 #endif	/* IPV6 */
 
+static unsigned int selinux_ip_output(struct sk_buff *skb,
+				      u16 family)
+{
+	u32 sid;
+
+	if (!netlbl_enabled())
+		return NF_ACCEPT;
+
+	/* we do this in the LOCAL_OUT path and not the POST_ROUTING path
+	 * because we want to make sure we apply the necessary labeling
+	 * before IPsec is applied so we can leverage AH protection */
+	if (skb->sk) {
+		struct sk_security_struct *sksec = skb->sk->sk_security;
+		sid = sksec->sid;
+	} else
+		sid = SECINITSID_KERNEL;
+	if (selinux_netlbl_skbuff_setsid(skb, family, sid) != 0)
+		return NF_DROP;
+
+	return NF_ACCEPT;
+}
+
+static unsigned int selinux_ipv4_output(unsigned int hooknum,
+					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
+					int (*okfn)(struct sk_buff *))
+{
+	return selinux_ip_output(skb, PF_INET);
+}
+
 static int selinux_ip_postroute_iptables_compat(struct sock *sk,
 						int ifindex,
 						struct avc_audit_data *ad,
@@ -5700,6 +5741,13 @@ static struct nf_hook_ops selinux_ipv4_ops[] = {
 		.pf =		PF_INET,
 		.hooknum =	NF_INET_FORWARD,
 		.priority =	NF_IP_PRI_SELINUX_FIRST,
+	},
+	{
+		.hook =		selinux_ipv4_output,
+		.owner =	THIS_MODULE,
+		.pf =		PF_INET,
+		.hooknum =	NF_INET_LOCAL_OUT,
+		.priority =	NF_IP_PRI_SELINUX_FIRST,
 	}
 };
 
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index d4e3ac8a7fbf..b3e6ae071fc3 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -48,6 +48,9 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 				 u16 family,
 				 u32 *type,
 				 u32 *sid);
+int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
+				 u16 family,
+				 u32 sid);
 
 void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock);
 int selinux_netlbl_socket_post_create(struct socket *sock);
@@ -88,6 +91,12 @@ static inline int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 	*sid = SECSID_NULL;
 	return 0;
 }
+static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
+					       u16 family,
+					       u32 sid)
+{
+	return 0;
+}
 
 static inline void selinux_netlbl_sock_graft(struct sock *sk,
 					     struct socket *sock)
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 91070ab874ce..f46dd1c3d01c 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -117,6 +117,7 @@ struct sk_security_struct {
 		NLBL_UNSET = 0,
 		NLBL_REQUIRE,
 		NLBL_LABELED,
+		NLBL_REQSKB,
 	} nlbl_state;
 #endif
 };
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 4053f7fc95fb..090404d6e512 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -9,7 +9,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2007
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2007, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -31,6 +31,8 @@
 #include <linux/rcupdate.h>
 #include <net/sock.h>
 #include <net/netlabel.h>
+#include <net/inet_sock.h>
+#include <net/inet_connection_sock.h>
 
 #include "objsec.h"
 #include "security.h"
@@ -77,6 +79,8 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
 	struct netlbl_lsm_secattr secattr;
+	struct inet_sock *sk_inet;
+	struct inet_connection_sock *sk_conn;
 
 	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return 0;
@@ -87,8 +91,29 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 	if (rc != 0)
 		goto sock_setsid_return;
 	rc = netlbl_sock_setattr(sk, &secattr);
-	if (rc == 0)
+	switch (rc) {
+	case 0:
 		sksec->nlbl_state = NLBL_LABELED;
+		break;
+	case -EDESTADDRREQ:
+		/* we are going to possibly end up labeling the individual
+		 * packets later which is problematic for stream sockets
+		 * because of the additional IP header size, our solution is to
+		 * allow for the maximum IP header length (40 bytes for IPv4,
+		 * we don't have to worry about IPv6 yet) just in case */
+		sk_inet = inet_sk(sk);
+		if (sk_inet->is_icsk) {
+			sk_conn = inet_csk(sk);
+			if (sk_inet->opt)
+				sk_conn->icsk_ext_hdr_len -=
+							   sk_inet->opt->optlen;
+			sk_conn->icsk_ext_hdr_len += 40;
+			sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+		}
+		sksec->nlbl_state = NLBL_REQSKB;
+		rc = 0;
+		break;
+	}
 
 sock_setsid_return:
 	netlbl_secattr_destroy(&secattr);
@@ -182,6 +207,45 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 	return rc;
 }
 
+/**
+ * selinux_netlbl_skbuff_setsid - Set the NetLabel on a packet given a sid
+ * @skb: the packet
+ * @family: protocol family
+ * @sid: the SID
+ *
+ * Description
+ * Call the NetLabel mechanism to set the label of a packet using @sid.
+ * Returns zero on auccess, negative values on failure.
+ *
+ */
+int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
+				 u16 family,
+				 u32 sid)
+{
+	int rc;
+	struct netlbl_lsm_secattr secattr;
+	struct sock *sk;
+
+	/* if this is a locally generated packet check to see if it is already
+	 * being labeled by it's parent socket, if it is just exit */
+	sk = skb->sk;
+	if (sk != NULL) {
+		struct sk_security_struct *sksec = sk->sk_security;
+		if (sksec->nlbl_state != NLBL_REQSKB)
+			return 0;
+	}
+
+	netlbl_secattr_init(&secattr);
+	rc = security_netlbl_sid_to_secattr(sid, &secattr);
+	if (rc != 0)
+		goto skbuff_setsid_return;
+	rc = netlbl_skbuff_setattr(skb, family, &secattr);
+
+skbuff_setsid_return:
+	netlbl_secattr_destroy(&secattr);
+	return rc;
+}
+
 /**
  * selinux_netlbl_sock_graft - Netlabel the new socket
  * @sk: the new connection
-- 
cgit v1.2.3


From 014ab19a69c325f52d7bae54ceeda73d6307ae0c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:33 -0400
Subject: selinux: Set socket NetLabel based on connection endpoint

Previous work enabled the use of address based NetLabel selectors, which while
highly useful, brought the potential for additional per-packet overhead when
used.  This patch attempts to solve that by applying NetLabel socket labels
when sockets are connect()'d.  This should alleviate the per-packet NetLabel
labeling for all connected sockets (yes, it even works for connected DGRAM
sockets).

Signed-off-by: Paul Moore <paul.moore@hp.com>
Reviewed-by: James Morris <jmorris@namei.org>
---
 include/net/cipso_ipv4.h            |   5 ++
 include/net/netlabel.h              |  13 ++++
 net/ipv4/cipso_ipv4.c               |  74 ++++++++++++++++++
 net/netlabel/netlabel_kapi.c        |  78 ++++++++++++++++++-
 security/selinux/hooks.c            |  11 +--
 security/selinux/include/netlabel.h |  19 ++++-
 security/selinux/include/objsec.h   |   1 +
 security/selinux/netlabel.c         | 147 +++++++++++++++++++++++++++++-------
 8 files changed, 311 insertions(+), 37 deletions(-)

(limited to 'security')

diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 2ce093ba553d..811febf97caf 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -207,6 +207,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway);
 int cipso_v4_sock_setattr(struct sock *sk,
 			  const struct cipso_v4_doi *doi_def,
 			  const struct netlbl_lsm_secattr *secattr);
+void cipso_v4_sock_delattr(struct sock *sk);
 int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr);
 int cipso_v4_skbuff_setattr(struct sk_buff *skb,
 			    const struct cipso_v4_doi *doi_def,
@@ -230,6 +231,10 @@ static inline int cipso_v4_sock_setattr(struct sock *sk,
 	return -ENOSYS;
 }
 
+static inline void cipso_v4_sock_delattr(struct sock *sk)
+{
+}
+
 static inline int cipso_v4_sock_getattr(struct sock *sk,
 					struct netlbl_lsm_secattr *secattr)
 {
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 3f67e6d49e40..074cad40ac66 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -380,8 +380,12 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap,
 int netlbl_enabled(void);
 int netlbl_sock_setattr(struct sock *sk,
 			const struct netlbl_lsm_secattr *secattr);
+void netlbl_sock_delattr(struct sock *sk);
 int netlbl_sock_getattr(struct sock *sk,
 			struct netlbl_lsm_secattr *secattr);
+int netlbl_conn_setattr(struct sock *sk,
+			struct sockaddr *addr,
+			const struct netlbl_lsm_secattr *secattr);
 int netlbl_skbuff_setattr(struct sk_buff *skb,
 			  u16 family,
 			  const struct netlbl_lsm_secattr *secattr);
@@ -449,11 +453,20 @@ static inline int netlbl_sock_setattr(struct sock *sk,
 {
 	return -ENOSYS;
 }
+static inline void netlbl_sock_delattr(struct sock *sk)
+{
+}
 static inline int netlbl_sock_getattr(struct sock *sk,
 				      struct netlbl_lsm_secattr *secattr)
 {
 	return -ENOSYS;
 }
+static inline int netlbl_conn_setattr(struct sock *sk,
+				      struct sockaddr *addr,
+				      const struct netlbl_lsm_secattr *secattr)
+{
+	return -ENOSYS;
+}
 static inline int netlbl_skbuff_setattr(struct sk_buff *skb,
 				      u16 family,
 				      const struct netlbl_lsm_secattr *secattr)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index e13d6dbb66ab..23768b9d6b64 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1809,6 +1809,80 @@ socket_setattr_failure:
 	return ret_val;
 }
 
+/**
+ * cipso_v4_sock_delattr - Delete the CIPSO option from a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Removes the CIPSO option from a socket, if present.
+ *
+ */
+void cipso_v4_sock_delattr(struct sock *sk)
+{
+	u8 hdr_delta;
+	struct ip_options *opt;
+	struct inet_sock *sk_inet;
+
+	sk_inet = inet_sk(sk);
+	opt = sk_inet->opt;
+	if (opt == NULL || opt->cipso == 0)
+		return;
+
+	if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
+		u8 cipso_len;
+		u8 cipso_off;
+		unsigned char *cipso_ptr;
+		int iter;
+		int optlen_new;
+
+		cipso_off = opt->cipso - sizeof(struct iphdr);
+		cipso_ptr = &opt->__data[cipso_off];
+		cipso_len = cipso_ptr[1];
+
+		if (opt->srr > opt->cipso)
+			opt->srr -= cipso_len;
+		if (opt->rr > opt->cipso)
+			opt->rr -= cipso_len;
+		if (opt->ts > opt->cipso)
+			opt->ts -= cipso_len;
+		if (opt->router_alert > opt->cipso)
+			opt->router_alert -= cipso_len;
+		opt->cipso = 0;
+
+		memmove(cipso_ptr, cipso_ptr + cipso_len,
+			opt->optlen - cipso_off - cipso_len);
+
+		/* determining the new total option length is tricky because of
+		 * the padding necessary, the only thing i can think to do at
+		 * this point is walk the options one-by-one, skipping the
+		 * padding at the end to determine the actual option size and
+		 * from there we can determine the new total option length */
+		iter = 0;
+		optlen_new = 0;
+		while (iter < opt->optlen)
+			if (opt->__data[iter] != IPOPT_NOP) {
+				iter += opt->__data[iter + 1];
+				optlen_new = iter;
+			} else
+				iter++;
+		hdr_delta = opt->optlen;
+		opt->optlen = (optlen_new + 3) & ~3;
+		hdr_delta -= opt->optlen;
+	} else {
+		/* only the cipso option was present on the socket so we can
+		 * remove the entire option struct */
+		sk_inet->opt = NULL;
+		hdr_delta = opt->optlen;
+		kfree(opt);
+	}
+
+	if (sk_inet->is_icsk && hdr_delta > 0) {
+		struct inet_connection_sock *sk_conn = inet_csk(sk);
+		sk_conn->icsk_ext_hdr_len -= hdr_delta;
+		sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+	}
+}
+
 /**
  * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions
  * @cipso: the CIPSO v4 option
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index cc8047d1f505..78fc557689b2 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -10,7 +10,7 @@
  */
 
 /*
- * (c) Copyright Hewlett-Packard Development Company, L.P., 2006
+ * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008
  *
  * This program is free software;  you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -455,6 +455,20 @@ socket_setattr_return:
 	return ret_val;
 }
 
+/**
+ * netlbl_sock_delattr - Delete all the NetLabel labels on a socket
+ * @sk: the socket
+ *
+ * Description:
+ * Remove all the NetLabel labeling from @sk.  The caller is responsible for
+ * ensuring that @sk is locked.
+ *
+ */
+void netlbl_sock_delattr(struct sock *sk)
+{
+	cipso_v4_sock_delattr(sk);
+}
+
 /**
  * netlbl_sock_getattr - Determine the security attributes of a sock
  * @sk: the sock
@@ -472,6 +486,68 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 	return cipso_v4_sock_getattr(sk, secattr);
 }
 
+/**
+ * netlbl_conn_setattr - Label a connected socket using the correct protocol
+ * @sk: the socket to label
+ * @addr: the destination address
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Attach the correct label to the given connected socket using the security
+ * attributes specified in @secattr.  The caller is responsible for ensuring
+ * that @sk is locked.  Returns zero on success, negative values on failure.
+ *
+ */
+int netlbl_conn_setattr(struct sock *sk,
+			struct sockaddr *addr,
+			const struct netlbl_lsm_secattr *secattr)
+{
+	int ret_val;
+	struct sockaddr_in *addr4;
+	struct netlbl_domaddr4_map *af4_entry;
+
+	rcu_read_lock();
+	switch (addr->sa_family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						       addr4->sin_addr.s_addr);
+		if (af4_entry == NULL) {
+			ret_val = -ENOENT;
+			goto conn_setattr_return;
+		}
+		switch (af4_entry->type) {
+		case NETLBL_NLTYPE_CIPSOV4:
+			ret_val = cipso_v4_sock_setattr(sk,
+						   af4_entry->type_def.cipsov4,
+						   secattr);
+			break;
+		case NETLBL_NLTYPE_UNLABELED:
+			/* just delete the protocols we support for right now
+			 * but we could remove other protocols if needed */
+			cipso_v4_sock_delattr(sk);
+			ret_val = 0;
+			break;
+		default:
+			ret_val = -ENOENT;
+		}
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		/* since we don't support any IPv6 labeling protocols right
+		 * now we can optimize everything away until we do */
+		ret_val = 0;
+		break;
+#endif /* IPv6 */
+	default:
+		ret_val = 0;
+	}
+
+conn_setattr_return:
+	rcu_read_unlock();
+	return ret_val;
+}
+
 /**
  * netlbl_skbuff_setattr - Label a packet using the correct protocol
  * @skb: the packet
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 7432bdd5d367..632ac3e80a61 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3794,6 +3794,7 @@ out:
 
 static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen)
 {
+	struct sock *sk = sock->sk;
 	struct inode_security_struct *isec;
 	int err;
 
@@ -3807,7 +3808,6 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
 	isec = SOCK_INODE(sock)->i_security;
 	if (isec->sclass == SECCLASS_TCP_SOCKET ||
 	    isec->sclass == SECCLASS_DCCP_SOCKET) {
-		struct sock *sk = sock->sk;
 		struct avc_audit_data ad;
 		struct sockaddr_in *addr4 = NULL;
 		struct sockaddr_in6 *addr6 = NULL;
@@ -3841,6 +3841,8 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
 			goto out;
 	}
 
+	err = selinux_netlbl_socket_connect(sk, address);
+
 out:
 	return err;
 }
@@ -4290,8 +4292,6 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
 	    sk->sk_family == PF_UNIX)
 		isec->sid = sksec->sid;
 	sksec->sclass = isec->sclass;
-
-	selinux_netlbl_sock_graft(sk, parent);
 }
 
 static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb,
@@ -4342,8 +4342,7 @@ static void selinux_inet_csk_clone(struct sock *newsk,
 	selinux_netlbl_sk_security_reset(newsksec, req->rsk_ops->family);
 }
 
-static void selinux_inet_conn_established(struct sock *sk,
-				struct sk_buff *skb)
+static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 {
 	u16 family = sk->sk_family;
 	struct sk_security_struct *sksec = sk->sk_security;
@@ -4353,6 +4352,8 @@ static void selinux_inet_conn_established(struct sock *sk,
 		family = PF_INET;
 
 	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
+
+	selinux_netlbl_inet_conn_established(sk, family);
 }
 
 static void selinux_req_classify_flow(const struct request_sock *req,
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index b3e6ae071fc3..982bac0ac328 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -52,7 +52,7 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 				 u16 family,
 				 u32 sid);
 
-void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock);
+void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family);
 int selinux_netlbl_socket_post_create(struct socket *sock);
 int selinux_netlbl_inode_permission(struct inode *inode, int mask);
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
@@ -62,6 +62,8 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 int selinux_netlbl_socket_setsockopt(struct socket *sock,
 				     int level,
 				     int optname);
+int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr);
+
 #else
 static inline void selinux_netlbl_cache_invalidate(void)
 {
@@ -98,8 +100,14 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 	return 0;
 }
 
-static inline void selinux_netlbl_sock_graft(struct sock *sk,
-					     struct socket *sock)
+static inline int selinux_netlbl_conn_setsid(struct sock *sk,
+					     struct sockaddr *addr)
+{
+	return 0;
+}
+
+static inline void selinux_netlbl_inet_conn_established(struct sock *sk,
+							u16 family)
 {
 	return;
 }
@@ -125,6 +133,11 @@ static inline int selinux_netlbl_socket_setsockopt(struct socket *sock,
 {
 	return 0;
 }
+static inline int selinux_netlbl_socket_connect(struct sock *sk,
+						struct sockaddr *addr)
+{
+	return 0;
+}
 #endif /* CONFIG_NETLABEL */
 
 #endif
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index f46dd1c3d01c..ad34787c6c02 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -118,6 +118,7 @@ struct sk_security_struct {
 		NLBL_REQUIRE,
 		NLBL_LABELED,
 		NLBL_REQSKB,
+		NLBL_CONNLABELED,
 	} nlbl_state;
 #endif
 };
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 090404d6e512..b22b7dafa0e3 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -29,10 +29,12 @@
 
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <net/sock.h>
 #include <net/netlabel.h>
-#include <net/inet_sock.h>
-#include <net/inet_connection_sock.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
 
 #include "objsec.h"
 #include "security.h"
@@ -79,8 +81,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
 	struct netlbl_lsm_secattr secattr;
-	struct inet_sock *sk_inet;
-	struct inet_connection_sock *sk_conn;
 
 	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return 0;
@@ -96,20 +96,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 		sksec->nlbl_state = NLBL_LABELED;
 		break;
 	case -EDESTADDRREQ:
-		/* we are going to possibly end up labeling the individual
-		 * packets later which is problematic for stream sockets
-		 * because of the additional IP header size, our solution is to
-		 * allow for the maximum IP header length (40 bytes for IPv4,
-		 * we don't have to worry about IPv6 yet) just in case */
-		sk_inet = inet_sk(sk);
-		if (sk_inet->is_icsk) {
-			sk_conn = inet_csk(sk);
-			if (sk_inet->opt)
-				sk_conn->icsk_ext_hdr_len -=
-							   sk_inet->opt->optlen;
-			sk_conn->icsk_ext_hdr_len += 40;
-			sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
-		}
 		sksec->nlbl_state = NLBL_REQSKB;
 		rc = 0;
 		break;
@@ -247,21 +233,77 @@ skbuff_setsid_return:
 }
 
 /**
- * selinux_netlbl_sock_graft - Netlabel the new socket
+ * selinux_netlbl_inet_conn_established - Netlabel the newly accepted connection
  * @sk: the new connection
- * @sock: the new socket
  *
  * Description:
- * The connection represented by @sk is being grafted onto @sock so set the
- * socket's NetLabel to match the SID of @sk.
+ * A new connection has been established on @sk so make sure it is labeled
+ * correctly with the NetLabel susbsystem.
  *
  */
-void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
+void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family)
 {
-	/* Try to set the NetLabel on the socket to save time later, if we fail
-	 * here we will pick up the pieces in later calls to
-	 * selinux_netlbl_inode_permission(). */
-	selinux_netlbl_sock_setsid(sk);
+	int rc;
+	struct sk_security_struct *sksec = sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+	struct inet_sock *sk_inet = inet_sk(sk);
+	struct sockaddr_in addr;
+
+	if (sksec->nlbl_state != NLBL_REQUIRE)
+		return;
+
+	netlbl_secattr_init(&secattr);
+	if (security_netlbl_sid_to_secattr(sksec->sid, &secattr) != 0)
+		goto inet_conn_established_return;
+
+	rc = netlbl_sock_setattr(sk, &secattr);
+	switch (rc) {
+	case 0:
+		sksec->nlbl_state = NLBL_LABELED;
+		break;
+	case -EDESTADDRREQ:
+		/* no PF_INET6 support yet because we don't support any IPv6
+		 * labeling protocols */
+		if (family != PF_INET) {
+			sksec->nlbl_state = NLBL_UNSET;
+			goto inet_conn_established_return;
+		}
+
+		addr.sin_family = family;
+		addr.sin_addr.s_addr = sk_inet->daddr;
+		if (netlbl_conn_setattr(sk, (struct sockaddr *)&addr,
+					&secattr) != 0) {
+			/* we failed to label the connected socket (could be
+			 * for a variety of reasons, the actual "why" isn't
+			 * important here) so we have to go to our backup plan,
+			 * labeling the packets individually in the netfilter
+			 * local output hook.  this is okay but we need to
+			 * adjust the MSS of the connection to take into
+			 * account any labeling overhead, since we don't know
+			 * the exact overhead at this point we'll use the worst
+			 * case value which is 40 bytes for IPv4 */
+			struct inet_connection_sock *sk_conn = inet_csk(sk);
+			sk_conn->icsk_ext_hdr_len += 40 -
+				      (sk_inet->opt ? sk_inet->opt->optlen : 0);
+			sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
+
+			sksec->nlbl_state = NLBL_REQSKB;
+		} else
+			sksec->nlbl_state = NLBL_CONNLABELED;
+		break;
+	default:
+		/* note that we are failing to label the socket which could be
+		 * a bad thing since it means traffic could leave the system
+		 * without the desired labeling, however, all is not lost as
+		 * we have a check in selinux_netlbl_inode_permission() to
+		 * pick up the pieces that we might drop here because we can't
+		 * return an error code */
+		break;
+	}
+
+inet_conn_established_return:
+	netlbl_secattr_destroy(&secattr);
+	return;
 }
 
 /**
@@ -398,7 +440,8 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock,
 	struct netlbl_lsm_secattr secattr;
 
 	if (level == IPPROTO_IP && optname == IP_OPTIONS &&
-	    sksec->nlbl_state == NLBL_LABELED) {
+	    (sksec->nlbl_state == NLBL_LABELED ||
+	     sksec->nlbl_state == NLBL_CONNLABELED)) {
 		netlbl_secattr_init(&secattr);
 		lock_sock(sk);
 		rc = netlbl_sock_getattr(sk, &secattr);
@@ -410,3 +453,51 @@ int selinux_netlbl_socket_setsockopt(struct socket *sock,
 
 	return rc;
 }
+
+/**
+ * selinux_netlbl_socket_connect - Label a client-side socket on connect
+ * @sk: the socket to label
+ * @addr: the destination address
+ *
+ * Description:
+ * Attempt to label a connected socket with NetLabel using the given address.
+ * Returns zero values on success, negative values on failure.
+ *
+ */
+int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
+{
+	int rc;
+	struct sk_security_struct *sksec = sk->sk_security;
+	struct netlbl_lsm_secattr secattr;
+
+	if (sksec->nlbl_state != NLBL_REQSKB &&
+	    sksec->nlbl_state != NLBL_CONNLABELED)
+		return 0;
+
+	netlbl_secattr_init(&secattr);
+	local_bh_disable();
+	bh_lock_sock_nested(sk);
+
+	/* connected sockets are allowed to disconnect when the address family
+	 * is set to AF_UNSPEC, if that is what is happening we want to reset
+	 * the socket */
+	if (addr->sa_family == AF_UNSPEC) {
+		netlbl_sock_delattr(sk);
+		sksec->nlbl_state = NLBL_REQSKB;
+		rc = 0;
+		goto socket_connect_return;
+	}
+	rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr);
+	if (rc != 0)
+		goto socket_connect_return;
+	rc = netlbl_conn_setattr(sk, addr, &secattr);
+	if (rc != 0)
+		goto socket_connect_return;
+	sksec->nlbl_state = NLBL_CONNLABELED;
+
+socket_connect_return:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	netlbl_secattr_destroy(&secattr);
+	return rc;
+}
-- 
cgit v1.2.3


From 6c5b3fc0147f79d714d2fe748b5869d7892ef2e7 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:33 -0400
Subject: selinux: Cache NetLabel secattrs in the socket's security struct

Previous work enabled the use of address based NetLabel selectors, which
while highly useful, brought the potential for additional per-packet overhead
when used.  This patch attempts to mitigate some of that overhead by caching
the NetLabel security attribute struct within the SELinux socket security
structure.  This should help eliminate the need to recreate the NetLabel
secattr structure for each packet resulting in less overhead.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 security/selinux/hooks.c            |   1 +
 security/selinux/include/netlabel.h |   7 +++
 security/selinux/include/objsec.h   |   7 ++-
 security/selinux/netlabel.c         | 115 +++++++++++++++++++++++++-----------
 4 files changed, 91 insertions(+), 39 deletions(-)

(limited to 'security')

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 632ac3e80a61..3aa811eba25b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -291,6 +291,7 @@ static void sk_free_security(struct sock *sk)
 	struct sk_security_struct *ssec = sk->sk_security;
 
 	sk->sk_security = NULL;
+	selinux_netlbl_sk_security_free(ssec);
 	kfree(ssec);
 }
 
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index 982bac0ac328..b913c8d06038 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -41,6 +41,7 @@ void selinux_netlbl_cache_invalidate(void);
 
 void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway);
 
+void selinux_netlbl_sk_security_free(struct sk_security_struct *ssec);
 void selinux_netlbl_sk_security_reset(struct sk_security_struct *ssec,
 				      int family);
 
@@ -77,6 +78,12 @@ static inline void selinux_netlbl_err(struct sk_buff *skb,
 	return;
 }
 
+static inline void selinux_netlbl_sk_security_free(
+					       struct sk_security_struct *ssec)
+{
+	return;
+}
+
 static inline void selinux_netlbl_sk_security_reset(
 					       struct sk_security_struct *ssec,
 					       int family)
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index ad34787c6c02..f8be8d7fa26d 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -109,9 +109,6 @@ struct netport_security_struct {
 };
 
 struct sk_security_struct {
-	u32 sid;			/* SID of this object */
-	u32 peer_sid;			/* SID of peer */
-	u16 sclass;			/* sock security class */
 #ifdef CONFIG_NETLABEL
 	enum {				/* NetLabel state */
 		NLBL_UNSET = 0,
@@ -120,7 +117,11 @@ struct sk_security_struct {
 		NLBL_REQSKB,
 		NLBL_CONNLABELED,
 	} nlbl_state;
+	struct netlbl_lsm_secattr *nlbl_secattr; /* NetLabel sec attributes */
 #endif
+	u32 sid;			/* SID of this object */
+	u32 peer_sid;			/* SID of peer */
+	u16 sclass;			/* sock security class */
 };
 
 struct key_security_struct {
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index b22b7dafa0e3..f58701a7b728 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -67,6 +67,38 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb,
 	return rc;
 }
 
+/**
+ * selinux_netlbl_sock_genattr - Generate the NetLabel socket secattr
+ * @sk: the socket
+ *
+ * Description:
+ * Generate the NetLabel security attributes for a socket, making full use of
+ * the socket's attribute cache.  Returns a pointer to the security attributes
+ * on success, NULL on failure.
+ *
+ */
+static struct netlbl_lsm_secattr *selinux_netlbl_sock_genattr(struct sock *sk)
+{
+	int rc;
+	struct sk_security_struct *sksec = sk->sk_security;
+	struct netlbl_lsm_secattr *secattr;
+
+	if (sksec->nlbl_secattr != NULL)
+		return sksec->nlbl_secattr;
+
+	secattr = netlbl_secattr_alloc(GFP_ATOMIC);
+	if (secattr == NULL)
+		return NULL;
+	rc = security_netlbl_sid_to_secattr(sksec->sid, secattr);
+	if (rc != 0) {
+		netlbl_secattr_free(secattr);
+		return NULL;
+	}
+	sksec->nlbl_secattr = secattr;
+
+	return secattr;
+}
+
 /**
  * selinux_netlbl_sock_setsid - Label a socket using the NetLabel mechanism
  * @sk: the socket to label
@@ -80,17 +112,15 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 {
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
-	struct netlbl_lsm_secattr secattr;
+	struct netlbl_lsm_secattr *secattr;
 
 	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return 0;
 
-	netlbl_secattr_init(&secattr);
-
-	rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr);
-	if (rc != 0)
-		goto sock_setsid_return;
-	rc = netlbl_sock_setattr(sk, &secattr);
+	secattr = selinux_netlbl_sock_genattr(sk);
+	if (secattr == NULL)
+		return -ENOMEM;
+	rc = netlbl_sock_setattr(sk, secattr);
 	switch (rc) {
 	case 0:
 		sksec->nlbl_state = NLBL_LABELED;
@@ -101,8 +131,6 @@ static int selinux_netlbl_sock_setsid(struct sock *sk)
 		break;
 	}
 
-sock_setsid_return:
-	netlbl_secattr_destroy(&secattr);
 	return rc;
 }
 
@@ -136,6 +164,20 @@ void selinux_netlbl_err(struct sk_buff *skb, int error, int gateway)
 	netlbl_skbuff_err(skb, error, gateway);
 }
 
+/**
+ * selinux_netlbl_sk_security_free - Free the NetLabel fields
+ * @sssec: the sk_security_struct
+ *
+ * Description:
+ * Free all of the memory in the NetLabel fields of a sk_security_struct.
+ *
+ */
+void selinux_netlbl_sk_security_free(struct sk_security_struct *ssec)
+{
+	if (ssec->nlbl_secattr != NULL)
+		netlbl_secattr_free(ssec->nlbl_secattr);
+}
+
 /**
  * selinux_netlbl_sk_security_reset - Reset the NetLabel fields
  * @ssec: the sk_security_struct
@@ -209,7 +251,8 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 				 u32 sid)
 {
 	int rc;
-	struct netlbl_lsm_secattr secattr;
+	struct netlbl_lsm_secattr secattr_storage;
+	struct netlbl_lsm_secattr *secattr = NULL;
 	struct sock *sk;
 
 	/* if this is a locally generated packet check to see if it is already
@@ -219,16 +262,21 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 		struct sk_security_struct *sksec = sk->sk_security;
 		if (sksec->nlbl_state != NLBL_REQSKB)
 			return 0;
+		secattr = sksec->nlbl_secattr;
+	}
+	if (secattr == NULL) {
+		secattr = &secattr_storage;
+		netlbl_secattr_init(secattr);
+		rc = security_netlbl_sid_to_secattr(sid, secattr);
+		if (rc != 0)
+			goto skbuff_setsid_return;
 	}
 
-	netlbl_secattr_init(&secattr);
-	rc = security_netlbl_sid_to_secattr(sid, &secattr);
-	if (rc != 0)
-		goto skbuff_setsid_return;
-	rc = netlbl_skbuff_setattr(skb, family, &secattr);
+	rc = netlbl_skbuff_setattr(skb, family, secattr);
 
 skbuff_setsid_return:
-	netlbl_secattr_destroy(&secattr);
+	if (secattr == &secattr_storage)
+		netlbl_secattr_destroy(secattr);
 	return rc;
 }
 
@@ -245,18 +293,18 @@ void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family)
 {
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
-	struct netlbl_lsm_secattr secattr;
+	struct netlbl_lsm_secattr *secattr;
 	struct inet_sock *sk_inet = inet_sk(sk);
 	struct sockaddr_in addr;
 
 	if (sksec->nlbl_state != NLBL_REQUIRE)
 		return;
 
-	netlbl_secattr_init(&secattr);
-	if (security_netlbl_sid_to_secattr(sksec->sid, &secattr) != 0)
-		goto inet_conn_established_return;
+	secattr = selinux_netlbl_sock_genattr(sk);
+	if (secattr == NULL)
+		return;
 
-	rc = netlbl_sock_setattr(sk, &secattr);
+	rc = netlbl_sock_setattr(sk, secattr);
 	switch (rc) {
 	case 0:
 		sksec->nlbl_state = NLBL_LABELED;
@@ -266,13 +314,13 @@ void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family)
 		 * labeling protocols */
 		if (family != PF_INET) {
 			sksec->nlbl_state = NLBL_UNSET;
-			goto inet_conn_established_return;
+			return;
 		}
 
 		addr.sin_family = family;
 		addr.sin_addr.s_addr = sk_inet->daddr;
 		if (netlbl_conn_setattr(sk, (struct sockaddr *)&addr,
-					&secattr) != 0) {
+					secattr) != 0) {
 			/* we failed to label the connected socket (could be
 			 * for a variety of reasons, the actual "why" isn't
 			 * important here) so we have to go to our backup plan,
@@ -300,10 +348,6 @@ void selinux_netlbl_inet_conn_established(struct sock *sk, u16 family)
 		 * return an error code */
 		break;
 	}
-
-inet_conn_established_return:
-	netlbl_secattr_destroy(&secattr);
-	return;
 }
 
 /**
@@ -468,13 +512,12 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
 {
 	int rc;
 	struct sk_security_struct *sksec = sk->sk_security;
-	struct netlbl_lsm_secattr secattr;
+	struct netlbl_lsm_secattr *secattr;
 
 	if (sksec->nlbl_state != NLBL_REQSKB &&
 	    sksec->nlbl_state != NLBL_CONNLABELED)
 		return 0;
 
-	netlbl_secattr_init(&secattr);
 	local_bh_disable();
 	bh_lock_sock_nested(sk);
 
@@ -487,17 +530,17 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
 		rc = 0;
 		goto socket_connect_return;
 	}
-	rc = security_netlbl_sid_to_secattr(sksec->sid, &secattr);
-	if (rc != 0)
+	secattr = selinux_netlbl_sock_genattr(sk);
+	if (secattr == NULL) {
+		rc = -ENOMEM;
 		goto socket_connect_return;
-	rc = netlbl_conn_setattr(sk, addr, &secattr);
-	if (rc != 0)
-		goto socket_connect_return;
-	sksec->nlbl_state = NLBL_CONNLABELED;
+	}
+	rc = netlbl_conn_setattr(sk, addr, secattr);
+	if (rc == 0)
+		sksec->nlbl_state = NLBL_CONNLABELED;
 
 socket_connect_return:
 	bh_unlock_sock(sk);
 	local_bh_enable();
-	netlbl_secattr_destroy(&secattr);
 	return rc;
 }
-- 
cgit v1.2.3


From 8d75899d033617316e06296b7c0729612f56aba0 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 10 Oct 2008 10:16:33 -0400
Subject: netlabel: Changes to the NetLabel security attributes to allow LSMs
 to pass full contexts

This patch provides support for including the LSM's secid in addition to
the LSM's MLS information in the NetLabel security attributes structure.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
---
 include/net/netlabel.h         | 2 +-
 security/selinux/ss/services.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'security')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 074cad40ac66..d56517ac3bae 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -203,7 +203,7 @@ struct netlbl_lsm_secattr {
 	u32 type;
 	char *domain;
 	struct netlbl_lsm_cache *cache;
-	union {
+	struct {
 		struct {
 			struct netlbl_lsm_secattr_catmap *cat;
 			u32 lvl;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index c8f688a10041..ed0ca649d7db 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2803,7 +2803,8 @@ int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr)
 		rc = -ENOMEM;
 		goto netlbl_sid_to_secattr_failure;
 	}
-	secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY;
+	secattr->attr.secid = sid;
+	secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY | NETLBL_SECATTR_SECID;
 	mls_export_netlbl_lvl(ctx, secattr);
 	rc = mls_export_netlbl_cat(ctx, secattr);
 	if (rc != 0)
-- 
cgit v1.2.3


From 92562927826fceb2f8e69c89e28161b8c1e0b125 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 7 Oct 2008 14:00:12 -0400
Subject: integrity: special fs magic

Discussion on the mailing list questioned the use of these
magic values in userspace, concluding these values are already
exported to userspace via statfs and their correct/incorrect
usage is left up to the userspace application.

  - Move special fs magic number definitions to magic.h
  - Add magic.h include

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Reviewed-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/debugfs/inode.c    | 3 +--
 include/linux/magic.h | 4 ++++
 mm/shmem.c            | 4 +---
 security/inode.c      | 3 +--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'security')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 08e28c9bb416..3dbe2169cf36 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -26,8 +26,7 @@
 #include <linux/debugfs.h>
 #include <linux/fsnotify.h>
 #include <linux/string.h>
-
-#define DEBUGFS_MAGIC	0x64626720
+#include <linux/magic.h>
 
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 1fa0c2ce4dec..f7f3fdddbef0 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -6,6 +6,10 @@
 #define AFS_SUPER_MAGIC                0x5346414F
 #define AUTOFS_SUPER_MAGIC	0x0187
 #define CODA_SUPER_MAGIC	0x73757245
+#define DEBUGFS_MAGIC          0x64626720
+#define SYSFS_MAGIC		0x62656572
+#define SECURITYFS_MAGIC	0x73636673
+#define TMPFS_MAGIC		0x01021994
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
 #define EXT3_SUPER_MAGIC	0xEF53
diff --git a/mm/shmem.c b/mm/shmem.c
index 04fb4f1ab88e..bf66d0191baf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -50,14 +50,12 @@
 #include <linux/migrate.h>
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
 #include <asm/pgtable.h>
 
-/* This magic number is used in glibc for posix shared memory */
-#define TMPFS_MAGIC	0x01021994
-
 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
diff --git a/security/inode.c b/security/inode.c
index ca4958ebad8d..efea5a605466 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -20,8 +20,7 @@
 #include <linux/init.h>
 #include <linux/namei.h>
 #include <linux/security.h>
-
-#define SECURITYFS_MAGIC	0x73636673
+#include <linux/magic.h>
 
 static struct vfsmount *mount;
 static int mount_count;
-- 
cgit v1.2.3


From 452a00d2ee288f2cbc36f676edd06cb14d2878c1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Mon, 13 Oct 2008 10:39:13 +0100
Subject: tty: Make get_current_tty use a kref

We now return a kref covered tty reference. That ensures the tty structure
doesn't go away when you have a return from get_current_tty. This is not
enough to protect you from most of the resources being freed behind your
back - yet.

[Updated to include fixes for SELinux problems found by Andrew Morton and
 an s390 leak found while debugging the former]

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c      | 10 ++++++----
 drivers/s390/char/fs3270.c |  3 ++-
 fs/dquot.c                 |  6 +++---
 security/selinux/hooks.c   |  3 ++-
 4 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'security')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 9a76db3cda1c..4c0e4ed31a48 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -786,12 +786,12 @@ void disassociate_ctty(int on_exit)
 	tty = get_current_tty();
 	if (tty) {
 		tty_pgrp = get_pid(tty->pgrp);
-		lock_kernel();
 		mutex_unlock(&tty_mutex);
-		/* XXX: here we race, there is nothing protecting tty */
+		lock_kernel();
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
 		unlock_kernel();
+		tty_kref_put(tty);
 	} else if (on_exit) {
 		struct pid *old_pgrp;
 		spin_lock_irq(&current->sighand->siglock);
@@ -819,7 +819,6 @@ void disassociate_ctty(int on_exit)
 	spin_unlock_irq(&current->sighand->siglock);
 
 	mutex_lock(&tty_mutex);
-	/* It is possible that do_tty_hangup has free'd this tty */
 	tty = get_current_tty();
 	if (tty) {
 		unsigned long flags;
@@ -829,6 +828,7 @@ void disassociate_ctty(int on_exit)
 		tty->session = NULL;
 		tty->pgrp = NULL;
 		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+		tty_kref_put(tty);
 	} else {
 #ifdef TTY_DEBUG_HANGUP
 		printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
@@ -1806,6 +1806,8 @@ retry_open:
 		index = tty->index;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
+		/* FIXME: Should we take a driver reference ? */
+		tty_kref_put(tty);
 		goto got_driver;
 	}
 #ifdef CONFIG_VT
@@ -3135,7 +3137,7 @@ struct tty_struct *get_current_tty(void)
 {
 	struct tty_struct *tty;
 	WARN_ON_ONCE(!mutex_is_locked(&tty_mutex));
-	tty = current->signal->tty;
+	tty = tty_kref_get(current->signal->tty);
 	/*
 	 * session->tty can be changed/cleared from under us, make sure we
 	 * issue the load. The obtained pointer, when not NULL, is valid as
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index d18e6d2e0b49..3ef5425d0eb8 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -430,11 +430,12 @@ fs3270_open(struct inode *inode, struct file *filp)
 		mutex_lock(&tty_mutex);
 		tty = get_current_tty();
 		if (!tty || tty->driver->major != IBM_TTY3270_MAJOR) {
-			mutex_unlock(&tty_mutex);
+			tty_kref_put(tty);
 			rc = -ENODEV;
 			goto out;
 		}
 		minor = tty->index + RAW3270_FIRSTMINOR;
+		tty_kref_put(tty);
 		mutex_unlock(&tty_mutex);
 	}
 	/* Check if some other program is already using fullscreen mode. */
diff --git a/fs/dquot.c b/fs/dquot.c
index 8ec4d6cc7633..7417a6ca3129 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -897,8 +897,9 @@ static void print_warning(struct dquot *dquot, const int warntype)
 
 	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
+	mutex_unlock(&tty_mutex);
 	if (!tty)
-		goto out_lock;
+		return;
 	tty_write_message(tty, dquot->dq_sb->s_id);
 	if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN)
 		tty_write_message(tty, ": warning, ");
@@ -926,8 +927,7 @@ static void print_warning(struct dquot *dquot, const int warntype)
 			break;
 	}
 	tty_write_message(tty, msg);
-out_lock:
-	mutex_unlock(&tty_mutex);
+	tty_kref_put(tty);
 }
 #endif
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4a7374c12d9c..089d61a23952 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2123,6 +2123,7 @@ static inline void flush_unauthorized_files(struct files_struct *files)
 
 	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
+	mutex_unlock(&tty_mutex);
 	if (tty) {
 		file_list_lock();
 		file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -2139,8 +2140,8 @@ static inline void flush_unauthorized_files(struct files_struct *files)
 			}
 		}
 		file_list_unlock();
+		tty_kref_put(tty);
 	}
-	mutex_unlock(&tty_mutex);
 	/* Reset controlling tty. */
 	if (drop_tty)
 		no_tty();
-- 
cgit v1.2.3


From 934e6ebf96e8c1a0f299e64129fdaebc1132a427 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Mon, 13 Oct 2008 10:40:43 +0100
Subject: tty: Redo current tty locking

Currently it is sometimes locked by the tty mutex and sometimes by the
sighand lock. The latter is in fact correct and now we can hand back referenced
objects we can fix this up without problems around sleeping functions.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c      | 18 ++++--------------
 drivers/s390/char/fs3270.c |  1 +
 fs/dquot.c                 |  2 --
 security/selinux/hooks.c   |  2 --
 4 files changed, 5 insertions(+), 18 deletions(-)

(limited to 'security')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index b5f57d0b30ee..f40298e9873a 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -739,13 +739,11 @@ void tty_vhangup_self(void)
 {
 	struct tty_struct *tty;
 
-	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
 	if (tty) {
 		tty_vhangup(tty);
 		tty_kref_put(tty);
 	}
-	mutex_unlock(&tty_mutex);
 }
 
 /**
@@ -801,11 +799,9 @@ void disassociate_ctty(int on_exit)
 	struct pid *tty_pgrp = NULL;
 
 
-	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
 	if (tty) {
 		tty_pgrp = get_pid(tty->pgrp);
-		mutex_unlock(&tty_mutex);
 		lock_kernel();
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
@@ -822,7 +818,6 @@ void disassociate_ctty(int on_exit)
 			kill_pgrp(old_pgrp, SIGCONT, on_exit);
 			put_pid(old_pgrp);
 		}
-		mutex_unlock(&tty_mutex);
 		return;
 	}
 	if (tty_pgrp) {
@@ -837,7 +832,6 @@ void disassociate_ctty(int on_exit)
 	current->signal->tty_old_pgrp = NULL;
 	spin_unlock_irq(&current->sighand->siglock);
 
-	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
 	if (tty) {
 		unsigned long flags;
@@ -854,7 +848,6 @@ void disassociate_ctty(int on_exit)
 		       " = NULL", tty);
 #endif
 	}
-	mutex_unlock(&tty_mutex);
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
@@ -3180,14 +3173,11 @@ static void proc_set_tty(struct task_struct *tsk, struct tty_struct *tty)
 struct tty_struct *get_current_tty(void)
 {
 	struct tty_struct *tty;
-	WARN_ON_ONCE(!mutex_is_locked(&tty_mutex));
+	unsigned long flags;
+
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	tty = tty_kref_get(current->signal->tty);
-	/*
-	 * session->tty can be changed/cleared from under us, make sure we
-	 * issue the load. The obtained pointer, when not NULL, is valid as
-	 * long as we hold tty_mutex.
-	 */
-	barrier();
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	return tty;
 }
 EXPORT_SYMBOL_GPL(get_current_tty);
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 3ef5425d0eb8..84fbc90480dc 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -431,6 +431,7 @@ fs3270_open(struct inode *inode, struct file *filp)
 		tty = get_current_tty();
 		if (!tty || tty->driver->major != IBM_TTY3270_MAJOR) {
 			tty_kref_put(tty);
+			mutex_unlock(&tty_mutex);
 			rc = -ENODEV;
 			goto out;
 		}
diff --git a/fs/dquot.c b/fs/dquot.c
index 7417a6ca3129..ad7e59003e04 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -895,9 +895,7 @@ static void print_warning(struct dquot *dquot, const int warntype)
 	    warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
 		return;
 
-	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
-	mutex_unlock(&tty_mutex);
 	if (!tty)
 		return;
 	tty_write_message(tty, dquot->dq_sb->s_id);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 089d61a23952..48881394fbd4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2121,9 +2121,7 @@ static inline void flush_unauthorized_files(struct files_struct *files)
 	long j = -1;
 	int drop_tty = 0;
 
-	mutex_lock(&tty_mutex);
 	tty = get_current_tty();
-	mutex_unlock(&tty_mutex);
 	if (tty) {
 		file_list_lock();
 		file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
-- 
cgit v1.2.3


From a447c0932445f92ce6f4c1bd020f62c5097a7842 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 13 Oct 2008 10:46:57 +0100
Subject: vfs: Use const for kernel parser table

This is a much better version of a previous patch to make the parser
tables constant. Rather than changing the typedef, we put the "const" in
all the various places where its required, allowing the __initconst
exception for nfsroot which was the cause of the previous trouble.

This was posted for review some time ago and I believe its been in -mm
since then.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Alexander Viro <aviro@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c | 2 +-
 arch/s390/hypfs/inode.c                   | 2 +-
 drivers/infiniband/ulp/srp/ib_srp.c       | 2 +-
 drivers/usb/core/inode.c                  | 2 +-
 fs/9p/v9fs.c                              | 2 +-
 fs/adfs/super.c                           | 2 +-
 fs/affs/super.c                           | 2 +-
 fs/afs/super.c                            | 2 +-
 fs/autofs/inode.c                         | 2 +-
 fs/autofs4/inode.c                        | 2 +-
 fs/befs/linuxvfs.c                        | 2 +-
 fs/devpts/inode.c                         | 2 +-
 fs/ecryptfs/main.c                        | 2 +-
 fs/ext2/super.c                           | 2 +-
 fs/ext3/super.c                           | 2 +-
 fs/ext4/super.c                           | 2 +-
 fs/fat/inode.c                            | 6 +++---
 fs/fuse/inode.c                           | 2 +-
 fs/gfs2/mount.c                           | 2 +-
 fs/hfs/super.c                            | 2 +-
 fs/hfsplus/options.c                      | 2 +-
 fs/hpfs/super.c                           | 2 +-
 fs/hugetlbfs/inode.c                      | 2 +-
 fs/isofs/inode.c                          | 2 +-
 fs/jfs/super.c                            | 2 +-
 fs/nfs/nfsroot.c                          | 2 +-
 fs/nfs/super.c                            | 6 +++---
 fs/ocfs2/super.c                          | 2 +-
 fs/omfs/inode.c                           | 2 +-
 fs/ubifs/super.c                          | 2 +-
 fs/udf/super.c                            | 2 +-
 fs/ufs/super.c                            | 4 ++--
 fs/xfs/linux-2.6/xfs_super.c              | 2 +-
 include/linux/parser.h                    | 2 +-
 lib/parser.c                              | 2 +-
 net/9p/client.c                           | 2 +-
 net/9p/trans_fd.c                         | 2 +-
 security/selinux/hooks.c                  | 2 +-
 38 files changed, 43 insertions(+), 43 deletions(-)

(limited to 'security')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 690ca7b0dcf6..2c8b8091250f 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -659,7 +659,7 @@ enum {
 	Opt_uid, Opt_gid, Opt_mode, Opt_debug, Opt_err,
 };
 
-static match_table_t spufs_tokens = {
+static const match_table_t spufs_tokens = {
 	{ Opt_uid,   "uid=%d" },
 	{ Opt_gid,   "gid=%d" },
 	{ Opt_mode,  "mode=%o" },
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 7383781f3e6a..36313801cd5c 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -219,7 +219,7 @@ static int hypfs_release(struct inode *inode, struct file *filp)
 
 enum { opt_uid, opt_gid, opt_err };
 
-static match_table_t hypfs_tokens = {
+static const match_table_t hypfs_tokens = {
 	{opt_uid, "uid=%u"},
 	{opt_gid, "gid=%u"},
 	{opt_err, NULL}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index ed7c5f72cb8b..5b8b533f2908 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1683,7 +1683,7 @@ enum {
 				   SRP_OPT_SERVICE_ID),
 };
 
-static match_table_t srp_opt_tokens = {
+static const match_table_t srp_opt_tokens = {
 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
 	{ SRP_OPT_DGID,			"dgid=%s" 		},
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index db410e92c80d..77fa7a080801 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -97,7 +97,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_devuid, "devuid=%u"},
 	{Opt_devgid, "devgid=%u"},
 	{Opt_devmode, "devmode=%o"},
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 047c791427aa..c061c3f18e7c 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -55,7 +55,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_debug, "debug=%x"},
 	{Opt_dfltuid, "dfltuid=%u"},
 	{Opt_dfltgid, "dfltgid=%u"},
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 26f3b43726bb..7f83a46f2b7e 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -157,7 +157,7 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
 
 enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err};
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_ownmask, "ownmask=%o"},
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3a89094f93d0..8989c93193ed 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -135,7 +135,7 @@ enum {
 	Opt_verbose, Opt_volume, Opt_ignore, Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bs, "bs=%u"},
 	{Opt_mode, "mode=%o"},
 	{Opt_mufs, "mufs"},
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 250d8c4d66e4..aee239a048cb 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -64,7 +64,7 @@ enum {
 	afs_opt_vol,
 };
 
-static match_table_t afs_options_list = {
+static const match_table_t afs_options_list = {
 	{ afs_opt_cell,		"cell=%s"	},
 	{ afs_opt_rwpath,	"rwpath"	},
 	{ afs_opt_vol,		"vol=%s"	},
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index dda510d31f84..b70eea1e8c59 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -59,7 +59,7 @@ static const struct super_operations autofs_sops = {
 
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
 
-static match_table_t autofs_tokens = {
+static const match_table_t autofs_tokens = {
 	{Opt_fd, "fd=%u"},
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 7bb3e5ba0537..45d55819203d 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -213,7 +213,7 @@ static const struct super_operations autofs4_sops = {
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
 	Opt_indirect, Opt_direct, Opt_offset};
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_fd, "fd=%u"},
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 740f53672a8a..9286b2af893a 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -650,7 +650,7 @@ enum {
 	Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err,
 };
 
-static match_table_t befs_tokens = {
+static const match_table_t befs_tokens = {
 	{Opt_uid, "uid=%d"},
 	{Opt_gid, "gid=%d"},
 	{Opt_charset, "iocharset=%s"},
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index a70d5d0890c7..4a714f6c1bed 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -49,7 +49,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_mode, "mode=%o"},
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 448dfd597b5f..8ebe9a5d1d99 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -211,7 +211,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
        ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
        ecryptfs_opt_encrypted_view, ecryptfs_opt_err };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{ecryptfs_opt_sig, "sig=%s"},
 	{ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"},
 	{ecryptfs_opt_cipher, "cipher=%s"},
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index fd88c7b43e66..647cd888ac87 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -393,7 +393,7 @@ enum {
 	Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bsd_df, "bsddf"},
 	{Opt_minix_df, "minixdf"},
 	{Opt_grpid, "grpid"},
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f38a5afc39a1..399a96a6c556 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -760,7 +760,7 @@ enum {
 	Opt_grpquota
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bsd_df, "bsddf"},
 	{Opt_minix_df, "minixdf"},
 	{Opt_grpid, "grpid"},
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb940c22ab0d..dea8f13c2fd9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -919,7 +919,7 @@ enum {
 	Opt_inode_readahead_blks
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_bsd_df, "bsddf"},
 	{Opt_minix_df, "minixdf"},
 	{Opt_grpid, "grpid"},
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 80ff3381fa21..d12cdf2a0406 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -855,7 +855,7 @@ enum {
 	Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
 };
 
-static match_table_t fat_tokens = {
+static const match_table_t fat_tokens = {
 	{Opt_check_r, "check=relaxed"},
 	{Opt_check_s, "check=strict"},
 	{Opt_check_n, "check=normal"},
@@ -890,14 +890,14 @@ static match_table_t fat_tokens = {
 	{Opt_tz_utc, "tz=UTC"},
 	{Opt_err, NULL},
 };
-static match_table_t msdos_tokens = {
+static const match_table_t msdos_tokens = {
 	{Opt_nodots, "nodots"},
 	{Opt_nodots, "dotsOK=no"},
 	{Opt_dots, "dots"},
 	{Opt_dots, "dotsOK=yes"},
 	{Opt_err, NULL}
 };
-static match_table_t vfat_tokens = {
+static const match_table_t vfat_tokens = {
 	{Opt_charset, "iocharset=%s"},
 	{Opt_shortname_lower, "shortname=lower"},
 	{Opt_shortname_win95, "shortname=win95"},
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d2249f174e20..6a84388cacff 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -354,7 +354,7 @@ enum {
 	OPT_ERR
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{OPT_FD,			"fd=%u"},
 	{OPT_ROOTMODE,			"rootmode=%o"},
 	{OPT_USER_ID,			"user_id=%u"},
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index df48333e6f01..f96eb90a2cfa 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -46,7 +46,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_lockproto, "lockproto=%s"},
 	{Opt_locktable, "locktable=%s"},
 	{Opt_hostdata, "hostdata=%s"},
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4abb1047c689..3c7c7637719c 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -173,7 +173,7 @@ enum {
 	opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{ opt_uid, "uid=%u" },
 	{ opt_gid, "gid=%u" },
 	{ opt_umask, "umask=%o" },
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 9997cbf8beb5..9699c56d323f 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -25,7 +25,7 @@ enum {
 	opt_force, opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{ opt_creator, "creator=%s" },
 	{ opt_type, "type=%s" },
 	{ opt_umask, "umask=%o" },
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index b8ae9c90ada0..29ad461d568f 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -215,7 +215,7 @@ enum {
 	Opt_timeshift, Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_help, "help"},
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3f58923fb39b..61edc701b0e6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -57,7 +57,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_size,	"size=%s"},
 	{Opt_nr_inodes,	"nr_inodes=%s"},
 	{Opt_mode,	"mode=%o"},
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 26948a6033b6..3f8af0f1505b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -310,7 +310,7 @@ enum {
 	Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_norock, "norock"},
 	{Opt_nojoliet, "nojoliet"},
 	{Opt_unhide, "unhide"},
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 3630718be395..0dae345e481b 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -199,7 +199,7 @@ enum {
 	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_integrity, "integrity"},
 	{Opt_nointegrity, "nointegrity"},
 	{Opt_iocharset, "iocharset=%s"},
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 46763d1cd397..8478fc25daee 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t __initdata tokens = {
+static match_table_t __initconst tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rsize, "rsize=%u"},
 	{Opt_wsize, "wsize=%u"},
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e9b20173fef3..ffb697416cb1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -98,7 +98,7 @@ enum {
 	Opt_err
 };
 
-static match_table_t nfs_mount_option_tokens = {
+static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_userspace, "bg" },
 	{ Opt_userspace, "fg" },
 	{ Opt_userspace, "retry=%s" },
@@ -163,7 +163,7 @@ enum {
 	Opt_xprt_err
 };
 
-static match_table_t nfs_xprt_protocol_tokens = {
+static const match_table_t nfs_xprt_protocol_tokens = {
 	{ Opt_xprt_udp, "udp" },
 	{ Opt_xprt_tcp, "tcp" },
 	{ Opt_xprt_rdma, "rdma" },
@@ -180,7 +180,7 @@ enum {
 	Opt_sec_err
 };
 
-static match_table_t nfs_secflavor_tokens = {
+static const match_table_t nfs_secflavor_tokens = {
 	{ Opt_sec_none, "none" },
 	{ Opt_sec_none, "null" },
 	{ Opt_sec_sys, "sys" },
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 88255d3f52b4..70334d85aff1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -157,7 +157,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_barrier, "barrier=%u"},
 	{Opt_err_panic, "errors=panic"},
 	{Opt_err_ro, "errors=remount-ro"},
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d29047b1b9b0..cbf047a847c5 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -346,7 +346,7 @@ enum {
 	Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_umask, "umask=%o"},
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3f4902060c7a..9a9220333b3b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -848,7 +848,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_fast_unmount, "fast_unmount"},
 	{Opt_norm_unmount, "norm_unmount"},
 	{Opt_err, NULL},
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 5698bbf83bbf..e25e7010627b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -369,7 +369,7 @@ enum {
 	Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_novrs,	"novrs"},
 	{Opt_nostrict,	"nostrict"},
 	{Opt_bs,	"bs=%u"},
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3141969b456d..e65212dfb60e 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -309,7 +309,7 @@ enum {
        Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_type_old, "ufstype=old"},
 	{Opt_type_sunx86, "ufstype=sunx86"},
 	{Opt_type_sun, "ufstype=sun"},
@@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
 	unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
-	struct match_token *tp = tokens;
+	const struct match_token *tp = tokens;
 
 	while (tp->token != Opt_onerror_panic && tp->token != mval)
 		++tp;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18d3c8487835..7227b2efef22 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -158,7 +158,7 @@ enum {
 	Opt_barrier, Opt_nobarrier, Opt_err
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
 	{Opt_err, NULL}
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 7dcd05075756..ea2281e726f6 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -25,7 +25,7 @@ typedef struct {
 	char *to;
 } substring_t;
 
-int match_token(char *, match_table_t table, substring_t args[]);
+int match_token(char *, const match_table_t table, substring_t args[]);
 int match_int(substring_t *, int *result);
 int match_octal(substring_t *, int *result);
 int match_hex(substring_t *, int *result);
diff --git a/lib/parser.c b/lib/parser.c
index 4f0cbc03e0e8..b00d02059a5f 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -100,7 +100,7 @@ static int match_one(char *s, const char *p, substring_t args[])
  * format identifiers which will be taken into account when matching the
  * tokens, and whose locations will be returned in the @args array.
  */
-int match_token(char *s, match_table_t table, substring_t args[])
+int match_token(char *s, const match_table_t table, substring_t args[])
 {
 	const struct match_token *p;
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 10e320307ec0..e053e06028a5 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -52,7 +52,7 @@ enum {
 	Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_msize, "msize=%u"},
 	{Opt_legacy, "noextend"},
 	{Opt_trans, "trans=%s"},
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index d652baf5ff91..6dabbdb66651 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -86,7 +86,7 @@ enum {
 	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_port, "port=%u"},
 	{Opt_rfdno, "rfdno=%u"},
 	{Opt_wfdno, "wfdno=%u"},
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 88f19536efad..576e51199079 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -325,7 +325,7 @@ enum {
 	Opt_rootcontext = 4,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
 	{Opt_context, CONTEXT_STR "%s"},
 	{Opt_fscontext, FSCONTEXT_STR "%s"},
 	{Opt_defcontext, DEFCONTEXT_STR "%s"},
-- 
cgit v1.2.3