1#! /usr/bin/env perl 2# Copyright 2002-2023 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10require 5.10.0; 11use warnings; 12use strict; 13 14use Carp qw(:DEFAULT cluck); 15use Pod::Checker; 16use File::Find; 17use File::Basename; 18use File::Spec::Functions; 19use Getopt::Std; 20use FindBin; 21use lib "$FindBin::Bin/perl"; 22 23use OpenSSL::Util::Pod; 24 25use lib '.'; 26use configdata; 27 28# Set to 1 for debug output 29my $debug = 0; 30 31# Options. 32our($opt_d); 33our($opt_e); 34our($opt_s); 35our($opt_o); 36our($opt_h); 37our($opt_l); 38our($opt_m); 39our($opt_n); 40our($opt_p); 41our($opt_u); 42our($opt_v); 43our($opt_c); 44our($opt_i); 45 46# Print usage message and exit. 47sub help { 48 print <<EOF; 49Find small errors (nits) in documentation. Options: 50 -c List undocumented commands, undocumented options and unimplemented options. 51 -d Detailed list of undocumented (implies -u) 52 -e Detailed list of new undocumented (implies -v) 53 -h Print this help message 54 -l Print bogus links 55 -m Name(s) of manuals to focus on. Default: man1,man3,man5,man7 56 -n Print nits in POD pages 57 -o Causes -e/-v to count symbols added since 1.1.1 as new (implies -v) 58 -i Checks for history entries available for symbols added since 3.0.0 as new 59 -u Count undocumented functions 60 -v Count new undocumented functions 61EOF 62 exit; 63} 64 65getopts('cdehlm:noiuv'); 66 67help() if $opt_h; 68$opt_u = 1 if $opt_d; 69$opt_v = 1 if $opt_o || $opt_e; 70die "Cannot use both -u and -v" 71 if $opt_u && $opt_v; 72die "Cannot use both -d and -e" 73 if $opt_d && $opt_e; 74 75# We only need to check c, l, n, u and v. 76# Options d, e, o imply one of the above. 77die "Need one of -[cdehlnouv] flags.\n" 78 unless $opt_c or $opt_l or $opt_n or $opt_u or $opt_v; 79 80 81my $temp = '/tmp/docnits.txt'; 82my $OUT; 83my $status = 0; 84 85$opt_m = "man1,man3,man5,man7" unless $opt_m; 86die "Argument of -m option may contain only man1, man3, man5, and/or man7" 87 unless $opt_m =~ /^(man[1357][, ]?)*$/; 88my @sections = ( split /[, ]/, $opt_m ); 89 90my %mandatory_sections = ( 91 '*' => [ 'NAME', 'COPYRIGHT' ], 92 1 => [ 'DESCRIPTION', 'SYNOPSIS', 'OPTIONS' ], 93 3 => [ 'DESCRIPTION', 'SYNOPSIS', 'RETURN VALUES' ], 94 5 => [ 'DESCRIPTION' ], 95 7 => [ ] 96 ); 97 98# Symbols that we ignored. 99# They are reserved macros that we currently don't document 100my $ignored = qr/(?| ^i2d_ 101 | ^d2i_ 102 | ^DEPRECATEDIN 103 | ^OSSL_DEPRECATED 104 | \Q_fnsig(3)\E$ 105 | ^IMPLEMENT_ 106 | ^_?DECLARE_ 107 | ^sk_ 108 | ^SKM_DEFINE_STACK_OF_INTERNAL 109 | ^lh_ 110 | ^DEFINE_LHASH_OF_(INTERNAL|DEPRECATED) 111 )/x; 112 113# A common regexp for C symbol names 114my $C_symbol = qr/\b[[:alpha:]][_[:alnum:]]*\b/; 115 116# Collect all POD files, both internal and public, and regardless of location 117# We collect them in a hash table with each file being a key, so we can attach 118# tags to them. For example, internal docs will have the word "internal" 119# attached to them. 120my %files = (); 121# We collect files names on the fly, on known tag basis 122my %collected_tags = (); 123# We cache results based on tags 124my %collected_results = (); 125 126# files OPTIONS 127# 128# Example: 129# 130# files(TAGS => 'manual'); 131# files(TAGS => [ 'manual', 'man1' ]); 132# 133# This function returns an array of files corresponding to a set of tags 134# given with the options "TAGS". The value of this option can be a single 135# word, or an array of several words, which work as inclusive or exclusive 136# selectors. Inclusive selectors are used to add one more set of files to 137# the returned array, while exclusive selectors limit the set of files added 138# to the array. The recognised tag values are: 139# 140# 'public_manual' - inclusive selector, adds public manuals to the 141# returned array of files. 142# 'internal_manual' - inclusive selector, adds internal manuals to the 143# returned array of files. 144# 'manual' - inclusive selector, adds any manual to the returned 145# array of files. This is really a shorthand for 146# 'public_manual' and 'internal_manual' combined. 147# 'public_header' - inclusive selector, adds public headers to the 148# returned array of files. 149# 'header' - inclusive selector, adds any header file to the 150# returned array of files. Since we currently only 151# care about public headers, this is exactly 152# equivalent to 'public_header', but is present for 153# consistency. 154# 155# 'man1', 'man3', 'man5', 'man7' 156# - exclusive selectors, only applicable together with 157# any of the manual selectors. If any of these are 158# present, only the manuals from the given sections 159# will be included. If none of these are present, 160# the manuals from all sections will be returned. 161# 162# All returned manual files come from configdata.pm. 163# All returned header files come from looking inside 164# "$config{sourcedir}/include/openssl" 165# 166sub files { 167 my %opts = ( @_ ); # Make a copy of the arguments 168 169 $opts{TAGS} = [ $opts{TAGS} ] if ref($opts{TAGS}) eq ''; 170 171 croak "No tags given, or not an array" 172 unless exists $opts{TAGS} && ref($opts{TAGS}) eq 'ARRAY'; 173 174 my %tags = map { $_ => 1 } @{$opts{TAGS}}; 175 $tags{public_manual} = 1 176 if $tags{manual} && ($tags{public} // !$tags{internal}); 177 $tags{internal_manual} = 1 178 if $tags{manual} && ($tags{internal} // !$tags{public}); 179 $tags{public_header} = 1 180 if $tags{header} && ($tags{public} // !$tags{internal}); 181 delete $tags{manual}; 182 delete $tags{header}; 183 delete $tags{public}; 184 delete $tags{internal}; 185 186 my $tags_as_key = join(':', sort keys %tags); 187 188 cluck "DEBUG[files]: This is how we got here!" if $debug; 189 print STDERR "DEBUG[files]: tags: $tags_as_key\n" if $debug; 190 191 my %tags_to_collect = ( map { $_ => 1 } 192 grep { !exists $collected_tags{$_} } 193 keys %tags ); 194 195 if ($tags_to_collect{public_manual}) { 196 print STDERR "DEBUG[files]: collecting public manuals\n" 197 if $debug; 198 199 # The structure in configdata.pm is that $unified_info{mandocs} 200 # contains lists of man files, and in turn, $unified_info{depends} 201 # contains hash tables showing which POD file each of those man 202 # files depend on. We use that information to find the POD files, 203 # and to attach the man section they belong to as tags 204 foreach my $mansect ( @sections ) { 205 foreach ( map { @{$unified_info{depends}->{$_}} } 206 @{$unified_info{mandocs}->{$mansect}} ) { 207 $files{$_} = { $mansect => 1, public_manual => 1 }; 208 } 209 } 210 $collected_tags{public_manual} = 1; 211 } 212 213 if ($tags_to_collect{internal_manual}) { 214 print STDERR "DEBUG[files]: collecting internal manuals\n" 215 if $debug; 216 217 # We don't have the internal docs in configdata.pm. However, they 218 # are all in the source tree, so they're easy to find. 219 foreach my $mansect ( @sections ) { 220 foreach ( glob(catfile($config{sourcedir}, 221 'doc', 'internal', $mansect, '*.pod')) ) { 222 $files{$_} = { $mansect => 1, internal_manual => 1 }; 223 } 224 } 225 $collected_tags{internal_manual} = 1; 226 } 227 228 if ($tags_to_collect{public_header}) { 229 print STDERR "DEBUG[files]: collecting public headers\n" 230 if $debug; 231 232 foreach ( glob(catfile($config{sourcedir}, 233 'include', 'openssl', '*.h')) ) { 234 $files{$_} = { public_header => 1 }; 235 } 236 } 237 238 my @result = @{$collected_results{$tags_as_key} // []}; 239 240 if (!@result) { 241 # Produce a result based on caller tags 242 foreach my $type ( ( 'public_manual', 'internal_manual' ) ) { 243 next unless $tags{$type}; 244 245 # If caller asked for specific sections, we care about sections. 246 # Otherwise, we give back all of them. 247 my @selected_sections = 248 grep { $tags{$_} } @sections; 249 @selected_sections = @sections unless @selected_sections; 250 251 foreach my $section ( ( @selected_sections ) ) { 252 push @result, 253 ( sort { basename($a) cmp basename($b) } 254 grep { $files{$_}->{$type} && $files{$_}->{$section} } 255 keys %files ); 256 } 257 } 258 if ($tags{public_header}) { 259 push @result, 260 ( sort { basename($a) cmp basename($b) } 261 grep { $files{$_}->{public_header} } 262 keys %files ); 263 } 264 265 if ($debug) { 266 print STDERR "DEBUG[files]: result:\n"; 267 print STDERR "DEBUG[files]: $_\n" foreach @result; 268 } 269 $collected_results{$tags_as_key} = [ @result ]; 270 } 271 272 return @result; 273} 274 275# Print error message, set $status. 276sub err { 277 print join(" ", @_), "\n"; 278 $status = 1 279} 280 281# Cross-check functions in the NAME and SYNOPSIS section. 282sub name_synopsis { 283 my $id = shift; 284 my $filename = shift; 285 my $contents = shift; 286 287 # Get NAME section and all words in it. 288 return unless $contents =~ /=head1 NAME(.*)=head1 SYNOPSIS/ms; 289 my $tmp = $1; 290 $tmp =~ tr/\n/ /; 291 err($id, "Trailing comma before - in NAME") 292 if $tmp =~ /, *-/; 293 $tmp =~ s/ -.*//g; 294 err($id, "POD markup among the names in NAME") 295 if $tmp =~ /[<>]/; 296 $tmp =~ s/ */ /g; 297 err($id, "Missing comma in NAME") 298 if $tmp =~ /[^,] /; 299 300 my $dirname = dirname($filename); 301 my $section = basename($dirname); 302 my $simplename = basename($filename, ".pod"); 303 my $foundfilename = 0; 304 my %foundfilenames = (); 305 my %names; 306 foreach my $n ( split ',', $tmp ) { 307 $n =~ s/^\s+//; 308 $n =~ s/\s+$//; 309 err($id, "The name '$n' contains white-space") 310 if $n =~ /\s/; 311 $names{$n} = 1; 312 $foundfilename++ if $n eq $simplename; 313 $foundfilenames{$n} = 1 314 if ( ( grep { basename($_) eq "$n.pod" } 315 files(TAGS => [ 'manual', $section ]) ) 316 && $n ne $simplename ); 317 } 318 err($id, "The following exist as other .pod files:", 319 sort keys %foundfilenames) 320 if %foundfilenames; 321 err($id, "$simplename (filename) missing from NAME section") 322 unless $foundfilename; 323 324 # Find all functions in SYNOPSIS 325 return unless $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms; 326 my $syn = $1; 327 my $ignore_until = undef; # If defined, this is a regexp 328 # Remove all non-code lines 329 $syn =~ s/^(?:\s*?|\S.*?)$//msg; 330 # Remove all comments 331 $syn =~ s/\/\*.*?\*\///msg; 332 while ( $syn ) { 333 # "env" lines end at a newline. 334 # Preprocessor lines start with a # and end at a newline. 335 # Other lines end with a semicolon, and may cover more than 336 # one physical line. 337 if ( $syn !~ /^ \s*(env .*?|#.*?|.*?;)\s*$/ms ) { 338 err($id, "Can't parse rest of synopsis:\n$syn\n(declarations not ending with a semicolon (;)?)"); 339 last; 340 } 341 my $line = $1; 342 $syn = $'; 343 344 print STDERR "DEBUG[name_synopsis] \$line = '$line'\n" if $debug; 345 346 # Special code to skip over documented structures 347 if ( defined $ignore_until) { 348 next if $line !~ /$ignore_until/; 349 $ignore_until = undef; 350 next; 351 } 352 if ( $line =~ /^\s*(?:typedef\s+)?struct(?:\s+\S+)\s*\{/ ) { 353 $ignore_until = qr/\}.*?;/; 354 next; 355 } 356 357 my $sym; 358 my $is_prototype = 1; 359 $line =~ s/LHASH_OF\([^)]+\)/int/g; 360 $line =~ s/STACK_OF\([^)]+\)/int/g; 361 $line =~ s/SPARSE_ARRAY_OF\([^)]+\)/int/g; 362 $line =~ s/__declspec\([^)]+\)//; 363 364 ## We don't prohibit that space, to allow typedefs looking like 365 ## this: 366 ## 367 ## typedef int (fantastically_long_name_breaks_80char_limit) 368 ## (fantastically_long_name_breaks_80char_limit *something); 369 ## 370 #if ( $line =~ /typedef.*\(\*?\S+\)\s+\(/ ) { 371 # # a callback function with whitespace before the argument list: 372 # # typedef ... (*NAME) (... 373 # # typedef ... (NAME) (... 374 # err($id, "Function typedef has space before arg list: $line"); 375 #} 376 377 if ( $line =~ /env (\S*)=/ ) { 378 # environment variable env NAME=... 379 $sym = $1; 380 } elsif ( $line =~ /typedef.*\(\*?($C_symbol)\)\s*\(/ ) { 381 # a callback function pointer: typedef ... (*NAME)(... 382 # a callback function signature: typedef ... (NAME)(... 383 $sym = $1; 384 } elsif ( $line =~ /typedef.*($C_symbol)\s*\(/ ) { 385 # a callback function signature: typedef ... NAME(... 386 $sym = $1; 387 } elsif ( $line =~ /typedef.*($C_symbol);/ ) { 388 # a simple typedef: typedef ... NAME; 389 $is_prototype = 0; 390 $sym = $1; 391 } elsif ( $line =~ /enum ($C_symbol) \{/ ) { 392 # an enumeration: enum ... { 393 $sym = $1; 394 } elsif ( $line =~ /#\s*(?:define|undef) ($C_symbol)/ ) { 395 $is_prototype = 0; 396 $sym = $1; 397 } elsif ( $line =~ /^[^\(]*?\(\*($C_symbol)\s*\(/ ) { 398 # a function returning a function pointer: TYPE (*NAME(args))(args) 399 $sym = $1; 400 } elsif ( $line =~ /^[^\(]*?($C_symbol)\s*\(/ ) { 401 # a simple function declaration 402 $sym = $1; 403 } 404 else { 405 next; 406 } 407 408 print STDERR "DEBUG[name_synopsis] \$sym = '$sym'\n" if $debug; 409 410 err($id, "$sym missing from NAME section") 411 unless defined $names{$sym}; 412 $names{$sym} = 2; 413 414 # Do some sanity checks on the prototype. 415 err($id, "Prototype missing spaces around commas: $line") 416 if $is_prototype && $line =~ /[a-z0-9],[^\s]/; 417 } 418 419 foreach my $n ( keys %names ) { 420 next if $names{$n} == 2; 421 err($id, "$n missing from SYNOPSIS") 422 } 423} 424 425# Check if SECTION ($3) is located before BEFORE ($4) 426sub check_section_location { 427 my $id = shift; 428 my $contents = shift; 429 my $section = shift; 430 my $before = shift; 431 432 return unless $contents =~ /=head1 $section/ 433 and $contents =~ /=head1 $before/; 434 err($id, "$section should appear before $before section") 435 if $contents =~ /=head1 $before.*=head1 $section/ms; 436} 437 438# Check if HISTORY section is present and functionname ($2) is present in it 439# or a generic "(f)unction* added" term hints at several new functions in 440# the documentation (yes, this is an approximation only but it works :) 441sub find_functionname_in_history_section { 442 my $contents = shift; 443 my $functionname = shift; 444 my (undef, $rest) = split('=head1 HISTORY\s*', $contents); 445 446 if (not $rest) { 447 # No HISTORY section is a clear error now 448 return 0; 449 } 450 else { 451 my ($histsect, undef) = split('=head1 COPYRIGHT\s*', $rest); 452 if (index($histsect, $functionname) == -1) { 453 # OK, functionname is not in HISTORY section... 454 # last try: Check for presence of "*unction*added*" 455 return 0 if (not $histsect =~ /unction.*added.*/g); 456 } 457 } 458 return 1; 459} 460 461# Check if a =head1 is duplicated, or a =headX is duplicated within a 462# =head1. Treats =head2 =head3 as equivalent -- it doesn't reset the head3 463# sets if it finds a =head2 -- but that is good enough for now. Also check 464# for proper capitalization, trailing periods, etc. 465sub check_head_style { 466 my $id = shift; 467 my $contents = shift; 468 my %head1; 469 my %subheads; 470 471 foreach my $line ( split /\n+/, $contents ) { 472 next unless $line =~ /^=head/; 473 if ( $line =~ /head1/ ) { 474 err($id, "Duplicate section $line") 475 if defined $head1{$line}; 476 $head1{$line} = 1; 477 %subheads = (); 478 } else { 479 err($id, "Duplicate subsection $line") 480 if defined $subheads{$line}; 481 $subheads{$line} = 1; 482 } 483 err($id, "Period in =head") 484 if $line =~ /\.[^\w]/ or $line =~ /\.$/; 485 err($id, "not all uppercase in =head1") 486 if $line =~ /head1.*[a-z]/; 487 err($id, "All uppercase in subhead") 488 if $line =~ /head[234][ A-Z0-9]+$/; 489 } 490} 491 492# Because we have options and symbols with extra markup, we need 493# to take that into account, so we need a regexp that extracts 494# markup chunks, including recursive markup. 495# please read up on /(?R)/ in perlre(1) 496# (note: order is important, (?R) needs to come before .) 497# (note: non-greedy is important, or something like 'B<foo> and B<bar>' 498# will be captured as one item) 499my $markup_re = 500 qr/( # Capture group 501 [BIL]< # The start of what we recurse on 502 (?:(?-1)|.)*? # recurse the whole regexp (referring to 503 # the last opened capture group, i.e. the 504 # start of this regexp), or pick next 505 # character. Do NOT be greedy! 506 > # The end of what we recurse on 507 )/x; # (the x allows this sort of split up regexp) 508 509# Options must start with a dash, followed by a letter, possibly 510# followed by letters, digits, dashes and underscores, and the last 511# character must be a letter or a digit. 512# We do also accept the single -? or -n, where n is a digit 513my $option_re = 514 qr/(?: 515 \? # Single question mark 516 | 517 \d # Single digit 518 | 519 - # Single dash (--) 520 | 521 [[:alpha:]](?:[-_[:alnum:]]*?[[:alnum:]])? 522 )/x; 523 524# Helper function to check if a given $thing is properly marked up 525# option. It returns one of these values: 526# undef if it's not an option 527# "" if it's a malformed option 528# $unwrapped the option with the outermost B<> wrapping removed. 529sub normalise_option { 530 my $id = shift; 531 my $filename = shift; 532 my $thing = shift; 533 534 my $unwrapped = $thing; 535 my $unmarked = $thing; 536 537 # $unwrapped is the option with the outer B<> markup removed 538 $unwrapped =~ s/^B<//; 539 $unwrapped =~ s/>$//; 540 # $unmarked is the option with *all* markup removed 541 $unmarked =~ s/[BIL]<|>//msg; 542 543 544 # If we found an option, check it, collect it 545 if ( $unwrapped =~ /^\s*-/ ) { 546 return $unwrapped # return option with outer B<> removed 547 if $unmarked =~ /^-${option_re}$/; 548 return ""; # Malformed option 549 } 550 return undef; # Something else 551} 552 553# Checks of command option (man1) formatting. The man1 checks are 554# restricted to the SYNOPSIS and OPTIONS sections, the rest is too 555# free form, we simply cannot be too strict there. 556 557sub option_check { 558 my $id = shift; 559 my $filename = shift; 560 my $contents = shift; 561 562 my $synopsis = ($contents =~ /=head1\s+SYNOPSIS(.*?)=head1/s, $1); 563 564 # Some pages have more than one OPTIONS section, let's make sure 565 # to get them all 566 my $options = ''; 567 while ( $contents =~ /=head1\s+[A-Z ]*?OPTIONS$(.*?)(?==head1)/msg ) { 568 $options .= $1; 569 } 570 571 # Look for options with no or incorrect markup 572 while ( $synopsis =~ 573 /(?<![-<[:alnum:]])-(?:$markup_re|.)*(?![->[:alnum:]])/msg ) { 574 err($id, "Malformed option [1] in SYNOPSIS: $&"); 575 } 576 577 my @synopsis; 578 while ( $synopsis =~ /$markup_re/msg ) { 579 my $found = $&; 580 push @synopsis, $found if $found =~ /^B<-/; 581 print STDERR "$id:DEBUG[option_check] SYNOPSIS: found $found\n" 582 if $debug; 583 my $option_uw = normalise_option($id, $filename, $found); 584 err($id, "Malformed option [2] in SYNOPSIS: $found") 585 if defined $option_uw && $option_uw eq ''; 586 } 587 588 # In OPTIONS, we look for =item paragraphs. 589 # (?=^\s*$) detects an empty line. 590 my @options; 591 while ( $options =~ /=item\s+(.*?)(?=^\s*$)/msg ) { 592 my $item = $&; 593 594 while ( $item =~ /(\[\s*)?($markup_re)/msg ) { 595 my $found = $2; 596 print STDERR "$id:DEBUG[option_check] OPTIONS: found $&\n" 597 if $debug; 598 err($id, "Unexpected bracket in OPTIONS =item: $item") 599 if ($1 // '') ne '' && $found =~ /^B<\s*-/; 600 601 my $option_uw = normalise_option($id, $filename, $found); 602 err($id, "Malformed option in OPTIONS: $found") 603 if defined $option_uw && $option_uw eq ''; 604 if ($found =~ /^B<-/) { 605 push @options, $found; 606 err($id, "OPTIONS entry $found missing from SYNOPSIS") 607 unless (grep /^\Q$found\E$/, @synopsis) 608 || $id =~ /(openssl|-options)\.pod:1:$/; 609 } 610 } 611 } 612 foreach (@synopsis) { 613 my $option = $_; 614 err($id, "SYNOPSIS entry $option missing from OPTIONS") 615 unless (grep /^\Q$option\E$/, @options); 616 } 617} 618 619# Normal symbol form 620my $symbol_re = qr/[[:alpha:]_][_[:alnum:]]*?/; 621 622# Checks of function name (man3) formatting. The man3 checks are 623# easier than the man1 checks, we only check the names followed by (), 624# and only the names that have POD markup. 625sub functionname_check { 626 my $id = shift; 627 my $filename = shift; 628 my $contents = shift; 629 630 while ( $contents =~ /($markup_re)\(\)/msg ) { 631 print STDERR "$id:DEBUG[functionname_check] SYNOPSIS: found $&\n" 632 if $debug; 633 634 my $symbol = $1; 635 my $unmarked = $symbol; 636 $unmarked =~ s/[BIL]<|>//msg; 637 638 err($id, "Malformed symbol: $symbol") 639 unless $symbol =~ /^B<.*?>$/ && $unmarked =~ /^${symbol_re}$/ 640 } 641 642 # We can't do the kind of collecting coolness that option_check() 643 # does, because there are too many things that can't be found in 644 # name repositories like the NAME sections, such as symbol names 645 # with a variable part (typically marked up as B<foo_I<TYPE>_bar> 646} 647 648# This is from http://man7.org/linux/man-pages/man7/man-pages.7.html 649my %preferred_words = ( 650 '16bit' => '16-bit', 651 'a.k.a.' => 'aka', 652 'bitmask' => 'bit mask', 653 'builtin' => 'built-in', 654 #'epoch' => 'Epoch', # handled specially, below 655 'fall-back' => 'fallback', 656 'file name' => 'filename', 657 'file system' => 'filesystem', 658 'host name' => 'hostname', 659 'i-node' => 'inode', 660 'lower case' => 'lowercase', 661 'lower-case' => 'lowercase', 662 'manpage' => 'man page', 663 'non-blocking' => 'nonblocking', 664 'non-default' => 'nondefault', 665 'non-empty' => 'nonempty', 666 'non-negative' => 'nonnegative', 667 'non-zero' => 'nonzero', 668 'path name' => 'pathname', 669 'pre-allocated' => 'preallocated', 670 'pseudo-terminal' => 'pseudoterminal', 671 'real time' => 'real-time', 672 'realtime' => 'real-time', 673 'reserved port' => 'privileged port', 674 'runtime' => 'run time', 675 'saved group ID'=> 'saved set-group-ID', 676 'saved set-GID' => 'saved set-group-ID', 677 'saved set-UID' => 'saved set-user-ID', 678 'saved user ID' => 'saved set-user-ID', 679 'set-GID' => 'set-group-ID', 680 'set-UID' => 'set-user-ID', 681 'setgid' => 'set-group-ID', 682 'setuid' => 'set-user-ID', 683 'sub-system' => 'subsystem', 684 'super block' => 'superblock', 685 'super-block' => 'superblock', 686 'super user' => 'superuser', 687 'super-user' => 'superuser', 688 'system port' => 'privileged port', 689 'time stamp' => 'timestamp', 690 'time zone' => 'timezone', 691 'upper case' => 'uppercase', 692 'upper-case' => 'uppercase', 693 'useable' => 'usable', 694 'user name' => 'username', 695 'userspace' => 'user space', 696 'zeroes' => 'zeros' 697); 698 699# Search manpage for words that have a different preferred use. 700sub wording { 701 my $id = shift; 702 my $contents = shift; 703 704 foreach my $k ( keys %preferred_words ) { 705 # Sigh, trademark 706 next if $k eq 'file system' 707 and $contents =~ /Microsoft Encrypted File System/; 708 err($id, "Found '$k' should use '$preferred_words{$k}'") 709 if $contents =~ /\b\Q$k\E\b/i; 710 } 711 err($id, "Found 'epoch' should use 'Epoch'") 712 if $contents =~ /\bepoch\b/; 713 if ( $id =~ m@man1/@ ) { 714 err($id, "found 'tool' in NAME, should use 'command'") 715 if $contents =~ /=head1 NAME.*\btool\b.*=head1 SYNOPSIS/s; 716 err($id, "found 'utility' in NAME, should use 'command'") 717 if $contents =~ /NAME.*\butility\b.*=head1 SYNOPSIS/s; 718 719 } 720} 721 722# Perform all sorts of nit/error checks on a manpage 723sub check { 724 my %podinfo = @_; 725 my $filename = $podinfo{filename}; 726 my $dirname = basename(dirname($filename)); 727 my $contents = $podinfo{contents}; 728 729 # Find what section this page is in; presume 3. 730 my $mansect = 3; 731 $mansect = $1 if $filename =~ /man([1-9])/; 732 733 my $id = "${filename}:1:"; 734 check_head_style($id, $contents); 735 736 # Check ordering of some sections in man3 737 if ( $mansect == 3 ) { 738 check_section_location($id, $contents, "RETURN VALUES", "EXAMPLES"); 739 check_section_location($id, $contents, "SEE ALSO", "HISTORY"); 740 check_section_location($id, $contents, "EXAMPLES", "SEE ALSO"); 741 } 742 743 # Make sure every link has a man section number. 744 while ( $contents =~ /$markup_re/msg ) { 745 my $target = $1; 746 next unless $target =~ /^L<(.*)>$/; # Skip if not L<...> 747 $target = $1; # Peal away L< and > 748 $target =~ s/\/[^\/]*$//; # Peal away possible anchor 749 $target =~ s/.*\|//g; # Peal away possible link text 750 next if $target eq ''; # Skip if links within page, or 751 next if $target =~ /::/; # links to a Perl module, or 752 next if $target =~ /^https?:/; # is a URL link, or 753 next if $target =~ /\([1357]\)$/; # it has a section 754 err($id, "Missing man section number (likely, $mansect) in L<$target>") 755 } 756 # Check for proper links to commands. 757 while ( $contents =~ /L<([^>]*)\(1\)(?:\/.*)?>/g ) { 758 my $target = $1; 759 next if $target =~ /openssl-?/; 760 next if ( grep { basename($_) eq "$target.pod" } 761 files(TAGS => [ 'manual', 'man1' ]) ); 762 next if $target =~ /ps|apropos|sha1sum|procmail|perl/; 763 err($id, "Bad command link L<$target(1)>") if grep /man1/, @sections; 764 } 765 # Check for proper in-man-3 API links. 766 while ( $contents =~ /L<([^>]*)\(3\)(?:\/.*)?>/g ) { 767 my $target = $1; 768 err($id, "Bad L<$target>") 769 unless $target =~ /^[_[:alpha:]][_[:alnum:]]*$/ 770 } 771 772 unless ( $contents =~ /^=for openssl generic/ms ) { 773 if ( $mansect == 3 ) { 774 name_synopsis($id, $filename, $contents); 775 functionname_check($id, $filename, $contents); 776 } elsif ( $mansect == 1 ) { 777 option_check($id, $filename, $contents) 778 } 779 } 780 781 wording($id, $contents); 782 783 err($id, "Doesn't start with =pod") 784 if $contents !~ /^=pod/; 785 err($id, "Doesn't end with =cut") 786 if $contents !~ /=cut\n$/; 787 err($id, "More than one cut line.") 788 if $contents =~ /=cut.*=cut/ms; 789 err($id, "EXAMPLE not EXAMPLES section.") 790 if $contents =~ /=head1 EXAMPLE[^S]/; 791 err($id, "WARNING not WARNINGS section.") 792 if $contents =~ /=head1 WARNING[^S]/; 793 err($id, "Missing copyright") 794 if $contents !~ /Copyright .* The OpenSSL Project Authors/; 795 err($id, "Copyright not last") 796 if $contents =~ /head1 COPYRIGHT.*=head/ms; 797 err($id, "head2 in All uppercase") 798 if $contents =~ /head2\s+[A-Z ]+\n/; 799 err($id, "Extra space after head") 800 if $contents =~ /=head\d\s\s+/; 801 err($id, "Period in NAME section") 802 if $contents =~ /=head1 NAME.*\.\n.*=head1 SYNOPSIS/ms; 803 err($id, "Duplicate $1 in L<>") 804 if $contents =~ /L<([^>]*)\|([^>]*)>/ && $1 eq $2; 805 err($id, "Bad =over $1") 806 if $contents =~ /=over([^ ][^24])/; 807 err($id, "Possible version style issue") 808 if $contents =~ /OpenSSL version [019]/; 809 810 if ( $contents !~ /=for openssl multiple includes/ ) { 811 # Look for multiple consecutive openssl #include lines 812 # (non-consecutive lines are okay; see man3/MD5.pod). 813 if ( $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms ) { 814 my $count = 0; 815 foreach my $line ( split /\n+/, $1 ) { 816 if ( $line =~ m@include <openssl/@ ) { 817 err($id, "Has multiple includes") 818 if ++$count == 2; 819 } else { 820 $count = 0; 821 } 822 } 823 } 824 } 825 826 open my $OUT, '>', $temp 827 or die "Can't open $temp, $!"; 828 err($id, "POD errors") 829 if podchecker($filename, $OUT) != 0; 830 close $OUT; 831 open $OUT, '<', $temp 832 or die "Can't read $temp, $!"; 833 while ( <$OUT> ) { 834 next if /\(section\) in.*deprecated/; 835 print; 836 } 837 close $OUT; 838 unlink $temp || warn "Can't remove $temp, $!"; 839 840 # Find what section this page is in; presume 3. 841 my $section = 3; 842 $section = $1 if $dirname =~ /man([1-9])/; 843 844 foreach ( (@{$mandatory_sections{'*'}}, @{$mandatory_sections{$section}}) ) { 845 err($id, "Missing $_ head1 section") 846 if $contents !~ /^=head1\s+${_}\s*$/m; 847 } 848} 849 850# Information database ############################################### 851 852# Map of links in each POD file; filename => [ "foo(1)", "bar(3)", ... ] 853my %link_map = (); 854# Map of names in each POD file or from "missing" files; possible values are: 855# If found in a POD files, "name(s)" => filename 856# If found in a "missing" file or external, "name(s)" => '' 857my %name_map = (); 858 859# State of man-page names. 860# %state is affected by loading util/*.num and util/*.syms 861# Values may be one of: 862# 'crypto' : belongs in libcrypto (loaded from libcrypto.num) 863# 'ssl' : belongs in libssl (loaded from libssl.num) 864# 'other' : belongs in libcrypto or libssl (loaded from other.syms) 865# 'internal' : Internal 866# 'public' : Public (generic name or external documentation) 867# Any of these values except 'public' may be prefixed with 'missing_' 868# to indicate that they are known to be missing. 869my %state; 870# history contains the same as state above for entries with version info != 3_0_0 871my %history; 872# %missing is affected by loading util/missing*.txt. Values may be one of: 873# 'crypto' : belongs in libcrypto (loaded from libcrypto.num) 874# 'ssl' : belongs in libssl (loaded from libssl.num) 875# 'other' : belongs in libcrypto or libssl (loaded from other.syms) 876# 'internal' : Internal 877my %missing; 878 879# Parse libcrypto.num, etc., and return sorted list of what's there. 880sub loadnum ($;$) { 881 my $file = shift; 882 my $type = shift; 883 my @symbols; 884 885 open my $IN, '<', catfile($config{sourcedir}, $file) 886 or die "Can't open $file, $!, stopped"; 887 888 while ( <$IN> ) { 889 next if /^#/; 890 next if /\bNOEXIST\b/; 891 my @fields = split(); 892 if ($type && ($type eq "crypto" || $type eq "ssl")) { 893 # 3rd field is version 894 if (not $fields[2] eq "3_0_0") { 895 $history{$fields[0].'(3)'} = $type.$fields[2]; 896 } 897 } 898 die "Malformed line $. in $file: $_" 899 if scalar @fields != 2 && scalar @fields != 4; 900 $state{$fields[0].'(3)'} = $type // 'internal'; 901 } 902 close $IN; 903} 904 905# Load file of symbol names that we know aren't documented. 906sub loadmissing($;$) 907{ 908 my $missingfile = shift; 909 my $type = shift; 910 911 open FH, catfile($config{sourcedir}, $missingfile) 912 or die "Can't open $missingfile"; 913 while ( <FH> ) { 914 chomp; 915 next if /^#/; 916 $missing{$_} = $type // 'internal'; 917 } 918 close FH; 919} 920 921# Check that we have consistent public / internal documentation and declaration 922sub checkstate () { 923 # Collect all known names, no matter where they come from 924 my %names = map { $_ => 1 } (keys %name_map, keys %state, keys %missing); 925 926 # Check section 3, i.e. functions and macros 927 foreach ( grep { $_ =~ /\(3\)$/ } sort keys %names ) { 928 next if ( $name_map{$_} // '') eq '' || $_ =~ /$ignored/; 929 930 # If a man-page isn't recorded public or if it's recorded missing 931 # and internal, it's declared to be internal. 932 my $declared_internal = 933 ($state{$_} // 'internal') eq 'internal' 934 || ($missing{$_} // '') eq 'internal'; 935 # If a man-page isn't recorded internal or if it's recorded missing 936 # and not internal, it's declared to be public 937 my $declared_public = 938 ($state{$_} // 'internal') ne 'internal' 939 || ($missing{$_} // 'internal') ne 'internal'; 940 941 err("$_ is supposedly public but is documented as internal") 942 if ( $declared_public && $name_map{$_} =~ /\/internal\// ); 943 err("$_ is supposedly internal (maybe missing from other.syms) but is documented as public") 944 if ( $declared_internal && $name_map{$_} !~ /\/internal\// ); 945 } 946} 947 948# Check for undocumented macros; ignore those in the "missing" file 949# and do simple check for #define in our header files. 950sub checkmacros { 951 my $count = 0; 952 my %seen; 953 954 foreach my $f ( files(TAGS => 'public_header') ) { 955 # Skip some internals we don't want to document yet. 956 my $b = basename($f); 957 next if $b eq 'asn1.h'; 958 next if $b eq 'asn1t.h'; 959 next if $b eq 'err.h'; 960 open(IN, $f) 961 or die "Can't open $f, $!"; 962 while ( <IN> ) { 963 next unless /^#\s*define\s*(\S+)\(/; 964 my $macro = "$1(3)"; # We know they're all in section 3 965 next if defined $name_map{$macro} 966 || defined $missing{$macro} 967 || defined $seen{$macro} 968 || $macro =~ /$ignored/; 969 970 err("$f:", "macro $macro undocumented") 971 if $opt_d || $opt_e; 972 $count++; 973 $seen{$macro} = 1; 974 } 975 close(IN); 976 } 977 err("# $count macros undocumented (count is approximate)") 978 if $count > 0; 979} 980 981# Find out what is undocumented (filtering out the known missing ones) 982# and display them. 983sub printem ($) { 984 my $type = shift; 985 my $count = 0; 986 987 foreach my $func ( grep { $state{$_} eq $type } sort keys %state ) { 988 err("$type:", "function $func not in any history section") 989 if ($opt_i && defined $history{$func}); 990 next if defined $name_map{$func} 991 || defined $missing{$func}; 992 993 err("$type:", "function $func undocumented") 994 if $opt_d || $opt_e; 995 $count++; 996 } 997 err("# $count lib$type names are not documented") 998 if $count > 0; 999} 1000 1001# Collect all the names in a manpage. 1002sub collectnames { 1003 my %podinfo = @_; 1004 my $filename = $podinfo{filename}; 1005 $filename =~ m|man(\d)/|; 1006 my $section = $1; 1007 my $simplename = basename($filename, ".pod"); 1008 my $id = "${filename}:1:"; 1009 my $is_generic = $podinfo{contents} =~ /^=for openssl generic/ms; 1010 1011 unless ( grep { $simplename eq $_ } @{$podinfo{names}} ) { 1012 err($id, "$simplename not in NAME section"); 1013 push @{$podinfo{names}}, $simplename; 1014 } 1015 foreach my $name ( @{$podinfo{names}} ) { 1016 next if $name eq ""; 1017 err($id, "'$name' contains whitespace") 1018 if $name =~ /\s/; 1019 my $name_sec = "$name($section)"; 1020 if ( !defined $name_map{$name_sec} ) { 1021 $name_map{$name_sec} = $filename; 1022 if ($history{$name_sec}) { 1023 my $funcname = $name_sec; 1024 my $contents = $podinfo{contents}; 1025 $funcname =~ s/\(.*//; 1026 if (find_functionname_in_history_section($contents, $funcname)) { 1027 # mark this function as found/no longer of interest 1028 $history{$name_sec} = undef; 1029 } 1030 } 1031 $state{$name_sec} //= 1032 ( $filename =~ /\/internal\// ? 'internal' : 'public' ) 1033 if $is_generic; 1034 } elsif ( $filename eq $name_map{$name_sec} ) { 1035 err($id, "$name_sec duplicated in NAME section of", 1036 $name_map{$name_sec}); 1037 } elsif ( $name_map{$name_sec} ne '' ) { 1038 err($id, "$name_sec also in NAME section of", 1039 $name_map{$name_sec}); 1040 } 1041 } 1042 1043 if ( $podinfo{contents} =~ /=for openssl foreign manual (.*)\n/ ) { 1044 foreach my $f ( split / /, $1 ) { 1045 $name_map{$f} = ''; # It still exists! 1046 $state{$f} = 'public'; # We assume! 1047 } 1048 } 1049 1050 my @links = (); 1051 # Don't use this regexp directly on $podinfo{contents}, as it causes 1052 # a regexp recursion, which fails on really big PODs. Instead, use 1053 # $markup_re to pick up general markup, and use this regexp to check 1054 # that the markup that was found is indeed a link. 1055 my $linkre = qr/L< 1056 # if the link is of the form L<something|name(s)>, 1057 # then remove 'something'. Note that 'something' 1058 # may contain POD codes as well... 1059 (?:(?:[^\|]|<[^>]*>)*\|)? 1060 # we're only interested in references that have 1061 # a one digit section number 1062 ([^\/>\(]+\(\d\)) 1063 /x; 1064 while ( $podinfo{contents} =~ /$markup_re/msg ) { 1065 my $x = $1; 1066 1067 if ($x =~ $linkre) { 1068 push @links, $1; 1069 } 1070 } 1071 $link_map{$filename} = [ @links ]; 1072} 1073 1074# Look for L<> ("link") references that point to files that do not exist. 1075sub checklinks { 1076 foreach my $filename ( sort keys %link_map ) { 1077 foreach my $link ( @{$link_map{$filename}} ) { 1078 err("${filename}:1:", "reference to non-existing $link") 1079 unless defined $name_map{$link} || defined $missing{$link}; 1080 err("${filename}:1:", "reference of internal $link in public documentation $filename") 1081 if ( ( ($state{$link} // '') eq 'internal' 1082 || ($missing{$link} // '') eq 'internal' ) 1083 && $filename !~ /\/internal\// ); 1084 } 1085 } 1086} 1087 1088# Cipher/digests to skip if they show up as "not implemented" 1089# because they are, via the "-*" construct. 1090my %skips = ( 1091 'aes128' => 1, 1092 'aes192' => 1, 1093 'aes256' => 1, 1094 'aria128' => 1, 1095 'aria192' => 1, 1096 'aria256' => 1, 1097 'camellia128' => 1, 1098 'camellia192' => 1, 1099 'camellia256' => 1, 1100 'des' => 1, 1101 'des3' => 1, 1102 'idea' => 1, 1103 'cipher' => 1, 1104 'digest' => 1, 1105); 1106 1107my %genopts; # generic options parsed from apps/include/opt.h 1108 1109# Check the flags of a command and see if everything is in the manpage 1110sub checkflags { 1111 my $cmd = shift; 1112 my $doc = shift; 1113 my @cmdopts; 1114 my %docopts; 1115 1116 # Get the list of options in the command source file. 1117 my $active = 0; 1118 my $expect_helpstr = ""; 1119 open CFH, "apps/$cmd.c" 1120 or die "Can't open apps/$cmd.c to list options for $cmd, $!"; 1121 while ( <CFH> ) { 1122 chop; 1123 if ($active) { 1124 last if m/^\s*};/; 1125 if ($expect_helpstr ne "") { 1126 next if m/^\s*#\s*if/; 1127 err("$cmd does not implement help for -$expect_helpstr") unless m/^\s*"/; 1128 $expect_helpstr = ""; 1129 } 1130 if (m/\{\s*"([^"]+)"\s*,\s*OPT_[A-Z0-9_]+\s*,\s*('[-\/:<>cAEfFlMnNpsuU]'|0)(.*)$/ 1131 && !($cmd eq "s_client" && $1 eq "wdebug")) { 1132 push @cmdopts, $1; 1133 $expect_helpstr = $1; 1134 $expect_helpstr = "" if $3 =~ m/^\s*,\s*"/; 1135 } elsif (m/[\s,](OPT_[A-Z]+_OPTIONS?)\s*(,|$)/) { 1136 push @cmdopts, @{ $genopts{$1} }; 1137 } 1138 } elsif (m/^const\s+OPTIONS\s*/) { 1139 $active = 1; 1140 } 1141 } 1142 close CFH; 1143 1144 # Get the list of flags from the synopsis 1145 open CFH, "<$doc" 1146 or die "Can't open $doc, $!"; 1147 while ( <CFH> ) { 1148 chop; 1149 last if /DESCRIPTION/; 1150 my $opt; 1151 if ( /\[B<-([^ >]+)/ ) { 1152 $opt = $1; 1153 } elsif ( /^B<-([^ >]+)/ ) { 1154 $opt = $1; 1155 } else { 1156 next; 1157 } 1158 $opt = $1 if $opt =~ /I<(.*)/; 1159 $docopts{$1} = 1; 1160 } 1161 close CFH; 1162 1163 # See what's in the command not the manpage. 1164 my @undocced = sort grep { !defined $docopts{$_} } @cmdopts; 1165 foreach ( @undocced ) { 1166 err("$doc: undocumented $cmd option -$_"); 1167 } 1168 1169 # See what's in the manpage not the command. 1170 my @unimpl = sort grep { my $e = $_; !(grep /^\Q$e\E$/, @cmdopts) } keys %docopts; 1171 foreach ( @unimpl ) { 1172 next if $_ eq "-"; # Skip the -- end-of-flags marker 1173 next if defined $skips{$_}; 1174 err("$doc: $cmd does not implement -$_"); 1175 } 1176} 1177 1178## 1179## MAIN() 1180## Do the work requested by the various getopt flags. 1181## The flags are parsed in alphabetical order, just because we have 1182## to have *some way* of listing them. 1183## 1184 1185if ( $opt_c ) { 1186 my @commands = (); 1187 1188 # Get the lists of generic options. 1189 my $active = ""; 1190 open OFH, catdir($config{sourcedir}, "apps/include/opt.h") 1191 or die "Can't open apps/include/opt.h to list generic options, $!"; 1192 while ( <OFH> ) { 1193 chop; 1194 push @{ $genopts{$active} }, $1 if $active ne "" && m/^\s+\{\s*"([^"]+)"\s*,\s*OPT_/; 1195 $active = $1 if m/^\s*#\s*define\s+(OPT_[A-Z]+_OPTIONS?)\s*\\\s*$/; 1196 $active = "" if m/^\s*$/; 1197 } 1198 close OFH; 1199 1200 # Get list of commands. 1201 opendir(DIR, "apps"); 1202 @commands = grep(/\.c$/, readdir(DIR)); 1203 closedir(DIR); 1204 1205 # See if each has a manpage. 1206 foreach my $cmd ( @commands ) { 1207 $cmd =~ s/\.c$//; 1208 next if $cmd eq 'progs' || $cmd eq 'vms_decc_init'; 1209 my @doc = ( grep { basename($_) eq "openssl-$cmd.pod" 1210 # For "tsget" and "CA.pl" pod pages 1211 || basename($_) eq "$cmd.pod" } 1212 files(TAGS => [ 'manual', 'man1' ]) ); 1213 my $num = scalar @doc; 1214 if ($num > 1) { 1215 err("$num manuals for 'openssl $cmd': ".join(", ", @doc)); 1216 } elsif ($num < 1) { 1217 err("no manual for 'openssl $cmd'"); 1218 } else { 1219 checkflags($cmd, @doc); 1220 } 1221 } 1222} 1223 1224# Populate %state 1225loadnum('util/libcrypto.num', 'crypto'); 1226loadnum('util/libssl.num', 'ssl'); 1227loadnum('util/other.syms', 'other'); 1228loadnum('util/other-internal.syms'); 1229if ( $opt_o ) { 1230 loadmissing('util/missingmacro111.txt', 'crypto'); 1231 loadmissing('util/missingcrypto111.txt', 'crypto'); 1232 loadmissing('util/missingssl111.txt', 'ssl'); 1233} elsif ( !$opt_u ) { 1234 loadmissing('util/missingmacro.txt', 'crypto'); 1235 loadmissing('util/missingcrypto.txt', 'crypto'); 1236 loadmissing('util/missingssl.txt', 'ssl'); 1237 loadmissing('util/missingcrypto-internal.txt'); 1238 loadmissing('util/missingssl-internal.txt'); 1239} 1240 1241if ( $opt_n || $opt_l || $opt_u || $opt_v ) { 1242 my @files_to_read = ( $opt_n && @ARGV ) ? @ARGV : files(TAGS => 'manual'); 1243 1244 foreach (@files_to_read) { 1245 my %podinfo = extract_pod_info($_, { debug => $debug }); 1246 1247 collectnames(%podinfo) 1248 if ( $opt_l || $opt_u || $opt_v ); 1249 1250 check(%podinfo) 1251 if ( $opt_n ); 1252 } 1253} 1254 1255if ( $opt_l ) { 1256 checklinks(); 1257} 1258 1259if ( $opt_n ) { 1260 # If not given args, check that all man1 commands are named properly. 1261 if ( scalar @ARGV == 0 && grep /man1/, @sections ) { 1262 foreach ( files(TAGS => [ 'public_manual', 'man1' ]) ) { 1263 next if /openssl\.pod/ 1264 || /CA\.pl/ || /tsget\.pod/; # these commands are special cases 1265 err("$_ doesn't start with openssl-") unless /openssl-/; 1266 } 1267 } 1268} 1269 1270checkstate(); 1271 1272if ( $opt_u || $opt_v) { 1273 printem('crypto'); 1274 printem('ssl'); 1275 checkmacros(); 1276} 1277 1278exit $status; 1279