Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar: "This fixes the cputime scaling overflow problems for good without having bad 32-bit overhead, and gets rid of the div64_u64_rem() helper as well." * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: Revert "math64: New div64_u64_rem helper" sched: Avoid prev->stime underflow sched: Do not account bogus utime sched: Avoid cputime scaling overflow
2013-05-02 14:56:31 -07:00 · 2013-05-02 14:56:31 -07:00 · 0279b3c0ad
commit 0279b3c0ad
parent 797994f81a f300213415
3 changed files with 58 additions and 60 deletions
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@ -29,15 +29,6 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
 	return dividend / divisor;
 }
 /**
 * div64_u64_rem - unsigned 64bit divide with 64bit divisor
 */
 static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
 {
 	*remainder = dividend % divisor;
 	return dividend / divisor;
 }
 /**
 * div64_u64 - unsigned 64bit divide with 64bit divisor
 */
@ -70,16 +61,8 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder)
 extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
 #endif
 #ifndef div64_u64_rem
 extern u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder);
 #endif
 #ifndef div64_u64
-static inline u64 div64_u64(u64 dividend, u64 divisor)
+extern u64 div64_u64(u64 dividend, u64 divisor);
 {
 	u64 remainder;
 	return div64_u64_rem(dividend, divisor, &remainder);
 }
 #endif
 #ifndef div64_s64
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@ -506,34 +506,47 @@ void account_idle_ticks(unsigned long ticks)
 }
 /*
- * Perform (stime * rtime) / total with reduced chances
+ * Perform (stime * rtime) / total, but avoid multiplication overflow by
- * of multiplication overflows by using smaller factors
+ * loosing precision when the numbers are big.
 * like quotient and remainders of divisions between
 * rtime and total.
 */
 static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
 {
-	u64 rem, res, scaled;
+	u64 scaled;
-	if (rtime >= total) {
+	for (;;) {
-		/*
+		/* Make sure "rtime" is the bigger of stime/rtime */
-		 * Scale up to rtime / total then add
+		if (stime > rtime) {
-		 * the remainder scaled to stime / total.
+			u64 tmp = rtime; rtime = stime; stime = tmp;
-		 */
+		}
-		res = div64_u64_rem(rtime, total, &rem);
+
-		scaled = stime * res;
+		/* Make sure 'total' fits in 32 bits */
-		scaled += div64_u64(stime * rem, total);
+		if (total >> 32)
-	} else {
+			goto drop_precision;
-		/*
+
-		 * Same in reverse: scale down to total / rtime
+		/* Does rtime (and thus stime) fit in 32 bits? */
-		 * then substract that result scaled to
+		if (!(rtime >> 32))
-		 * to the remaining part.
+			break;
-		 */
+
-		res = div64_u64_rem(total, rtime, &rem);
+		/* Can we just balance rtime/stime rather than dropping bits? */
-		scaled = div64_u64(stime, res);
+		if (stime >> 31)
-		scaled -= div64_u64(scaled * rem, total);
+			goto drop_precision;
 		/* We can grow stime and shrink rtime and try to make them both fit */
 		stime <<= 1;
 		rtime >>= 1;
 		continue;
 drop_precision:
 		/* We drop from rtime, it has more bits than stime */
 		rtime >>= 1;
 		total >>= 1;
 	}
 	/*
 	 * Make sure gcc understands that this is a 32x32->64 multiply,
 	 * followed by a 64/32->64 divide.
 	 */
 	scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
 	return (__force cputime_t) scaled;
 }
@ -545,7 +558,7 @@ static void cputime_adjust(struct task_cputime *curr,
 			   struct cputime *prev,
 			   cputime_t *ut, cputime_t *st)
 {
-	cputime_t rtime, stime, total;
+	cputime_t rtime, stime, utime, total;
 	if (vtime_accounting_enabled()) {
 		*ut = curr->utime;
@ -568,13 +581,21 @@ static void cputime_adjust(struct task_cputime *curr,
 	 */
 	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
-	if (!rtime) {
+	/*
-		stime = 0;
+	 * Update userspace visible utime/stime values only if actual execution
-	} else if (!total) {
+	 * time is bigger than already exported. Note that can happen, that we
-		stime = rtime;
+	 * provided bigger values due to scaling inaccuracy on big numbers.
-	} else {
+	 */
 	if (prev->stime + prev->utime >= rtime)
 		goto out;
 	if (total) {
 		stime = scale_stime((__force u64)stime,
 				    (__force u64)rtime, (__force u64)total);
 		utime = rtime - stime;
 	} else {
 		stime = rtime;
 		utime = 0;
 	}
 	/*
@ -583,8 +604,9 @@ static void cputime_adjust(struct task_cputime *curr,
 	 * Let's enforce monotonicity.
 	 */
 	prev->stime = max(prev->stime, stime);
-	prev->utime = max(prev->utime, rtime - prev->stime);
+	prev->utime = max(prev->utime, utime);
 out:
 	*ut = prev->utime;
 	*st = prev->stime;
 }
--- a/lib/div64.c
+++ b/lib/div64.c
@ -79,10 +79,9 @@ EXPORT_SYMBOL(div_s64_rem);
 #endif
 /**
- * div64_u64_rem - unsigned 64bit divide with 64bit divisor and 64bit remainder
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
 * @dividend:	64bit dividend
 * @divisor:	64bit divisor
 * @remainder:  64bit remainder
 *
 * This implementation is a modified version of the algorithm proposed
 * by the book 'Hacker's Delight'.  The original source and full proof
@ -90,33 +89,27 @@ EXPORT_SYMBOL(div_s64_rem);
 *
 * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt'
 */
-#ifndef div64_u64_rem
+#ifndef div64_u64
-u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
+u64 div64_u64(u64 dividend, u64 divisor)
 {
 	u32 high = divisor >> 32;
 	u64 quot;
 	if (high == 0) {
-		u32 rem32;
+		quot = div_u64(dividend, divisor);
 		quot = div_u64_rem(dividend, divisor, &rem32);
 		*remainder = rem32;
 	} else {
 		int n = 1 + fls(high);
 		quot = div_u64(dividend >> n, divisor >> n);
 		if (quot != 0)
 			quot--;
-
+		if ((dividend - quot * divisor) >= divisor)
 		*remainder = dividend - quot * divisor;
 		if (*remainder >= divisor) {
 			quot++;
 			*remainder -= divisor;
 		}
 	}
 	return quot;
 }
-EXPORT_SYMBOL(div64_u64_rem);
+EXPORT_SYMBOL(div64_u64);
 #endif
 /**