checkpatch: look for common misspellings
Check for misspellings, based on Debian's lintian list.  Several false
positives were removed, and several additional words added that were
common in the kernel:
	backword backwords
	invalide valide
	recieves
	singed unsinged
While going back and fixing existing spelling mistakes isn't a high
priority, it'd be nice to try to catch them before they hit the tree.
In the 13830 commits between 3.15 and 3.16, the script would have noticed
560 spelling mistakes. The top 25 are shown here:
$ git log --pretty=oneline v3.15..v3.16 | wc -l
13830
$ git log --format='%H' v3.15..v3.16 | \
   while read commit ; do \
     echo "commit $commit" ; \
     git log --format=email --stat -p -1 $commit | \
       ./scripts/checkpatch.pl --types=typo_spelling --no-summary - ; \
   done | tee spell_v3.15..v3.16.txt | grep "may be misspelled" | \
   awk '{print $2}' | tr A-Z a-z | sort | uniq -c | sort -rn
     21 'seperate'
     17 'endianess'
     15 'sucess'
     13 'noticable'
     11 'occured'
     11 'accomodate'
     10 'interrup'
      9 'prefered'
      8 'unecessary'
      8 'explicitely'
      7 'supress'
      7 'overriden'
      7 'immediatly'
      7 'funtion'
      7 'defult'
      7 'childs'
      6 'succesful'
      6 'splitted'
      6 'specifc'
      6 'reseting'
      6 'recieve'
      6 'changable'
      5 'tmis'
      5 'singed'
      5 'preceeding'
Thanks to Joe Perches for rewrites, suggestions, additional misspelling
entries, and testing.
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Joe Perches <joe@perches.com>
Cc: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									08a2843e77
								
							
						
					
					
						commit
						66b47b4a9d
					
				| @ -9,7 +9,8 @@ use strict; | ||||
| use POSIX; | ||||
| 
 | ||||
| my $P = $0; | ||||
| $P =~ s@.*/@@g; | ||||
| $P =~ s@(.*)/@@g; | ||||
| my $D = $1; | ||||
| 
 | ||||
| my $V = '0.32'; | ||||
| 
 | ||||
| @ -44,6 +45,7 @@ my $max_line_length = 80; | ||||
| my $ignore_perl_version = 0; | ||||
| my $minimum_perl_version = 5.10.0; | ||||
| my $min_conf_desc_length = 4; | ||||
| my $spelling_file = "$D/spelling.txt"; | ||||
| 
 | ||||
| sub help { | ||||
| 	my ($exitcode) = @_; | ||||
| @ -434,6 +436,29 @@ our $allowed_asm_includes = qr{(?x: | ||||
| )}; | ||||
| # memory.h: ARM has a custom one | ||||
| 
 | ||||
| # Load common spelling mistakes and build regular expression list. | ||||
| my $misspellings; | ||||
| my @spelling_list; | ||||
| my %spelling_fix; | ||||
| open(my $spelling, '<', $spelling_file) | ||||
|     or die "$P: Can't open $spelling_file for reading: $!\n"; | ||||
| while (<$spelling>) { | ||||
| 	my $line = $_; | ||||
| 
 | ||||
| 	$line =~ s/\s*\n?$//g; | ||||
| 	$line =~ s/^\s*//g; | ||||
| 
 | ||||
| 	next if ($line =~ m/^\s*#/); | ||||
| 	next if ($line =~ m/^\s*$/); | ||||
| 
 | ||||
| 	my ($suspect, $fix) = split(/\|\|/, $line); | ||||
| 
 | ||||
| 	push(@spelling_list, $suspect); | ||||
| 	$spelling_fix{$suspect} = $fix; | ||||
| } | ||||
| close($spelling); | ||||
| $misspellings = join("|", @spelling_list); | ||||
| 
 | ||||
| sub build_types { | ||||
| 	my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)"; | ||||
| 	my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)"; | ||||
| @ -2220,6 +2245,23 @@ sub process { | ||||
| 			    "8-bit UTF-8 used in possible commit log\n" . $herecurr); | ||||
| 		} | ||||
| 
 | ||||
| # Check for various typo / spelling mistakes | ||||
| 		if ($in_commit_log || $line =~ /^\+/) { | ||||
| 			while ($rawline =~ /(?:^|[^a-z@])($misspellings)(?:$|[^a-z@])/gi) { | ||||
| 				my $typo = $1; | ||||
| 				my $typo_fix = $spelling_fix{lc($typo)}; | ||||
| 				$typo_fix = ucfirst($typo_fix) if ($typo =~ /^[A-Z]/); | ||||
| 				$typo_fix = uc($typo_fix) if ($typo =~ /^[A-Z]+$/); | ||||
| 				my $msg_type = \&WARN; | ||||
| 				$msg_type = \&CHK if ($file); | ||||
| 				if (&{$msg_type}("TYPO_SPELLING", | ||||
| 						 "'$typo' may be misspelled - perhaps '$typo_fix'?\n" . $herecurr) && | ||||
| 				    $fix) { | ||||
| 					$fixed[$fixlinenr] =~ s/(^|[^A-Za-z@])($typo)($|[^A-Za-z@])/$1$typo_fix$3/; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| # ignore non-hunk lines and lines being removed | ||||
| 		next if (!$hunk_line || $line =~ /^-/); | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										1042
									
								
								scripts/spelling.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1042
									
								
								scripts/spelling.txt
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	Block a user