1#!/usr/bin/env perl 2#*************************************************************************** 3# _ _ ____ _ 4# Project ___| | | | _ \| | 5# / __| | | | |_) | | 6# | (__| |_| | _ <| |___ 7# \___|\___/|_| \_\_____| 8# 9# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 10# 11# This software is licensed as described in the file COPYING, which 12# you should have received as part of this distribution. The terms 13# are also available at https://curl.se/docs/copyright.html. 14# 15# You may opt to use, copy, modify, merge, publish, distribute and/or sell 16# copies of the Software, and permit persons to whom the Software is 17# furnished to do so, under the terms of the COPYING file. 18# 19# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 20# KIND, either express or implied. 21# 22# SPDX-License-Identifier: curl 23# 24########################################################################### 25 26=begin comment 27 28Converts a curldown file to nroff (man page). 29 30=end comment 31=cut 32 33use strict; 34use warnings; 35 36my $cd2nroff = "0.1"; # to keep check 37my $dir; 38my $extension; 39my $keepfilename; 40 41while(@ARGV) { 42 if($ARGV[0] eq "-d") { 43 shift @ARGV; 44 $dir = shift @ARGV; 45 } 46 elsif($ARGV[0] eq "-e") { 47 shift @ARGV; 48 $extension = shift @ARGV; 49 } 50 elsif($ARGV[0] eq "-k") { 51 shift @ARGV; 52 $keepfilename = 1; 53 } 54 elsif($ARGV[0] eq "-h") { 55 print <<HELP 56Usage: cd2nroff [options] [file.md] 57 58-d <dir> Write the output to the file name from the meta-data in the 59 specified directory, instead of writing to stdout 60-e <ext> If -d is used, this option can provide an added "extension", arbitrary 61 text really, to append to the file name. 62-h This help text, 63-v Show version then exit 64HELP 65 ; 66 exit 0; 67 } 68 elsif($ARGV[0] eq "-v") { 69 print "cd2nroff version $cd2nroff\n"; 70 exit 0; 71 } 72 else { 73 last; 74 } 75} 76 77use POSIX qw(strftime); 78my @ts; 79if (defined($ENV{SOURCE_DATE_EPOCH})) { 80 @ts = gmtime($ENV{SOURCE_DATE_EPOCH}); 81} else { 82 @ts = localtime; 83} 84my $date = strftime "%Y-%m-%d", @ts; 85 86sub outseealso { 87 my (@sa) = @_; 88 my $comma = 0; 89 my @o; 90 push @o, ".SH SEE ALSO\n"; 91 for my $s (sort @sa) { 92 push @o, sprintf "%s.BR $s", $comma ? ",\n": ""; 93 $comma = 1; 94 } 95 push @o, "\n"; 96 return @o; 97} 98 99sub outprotocols { 100 my (@p) = @_; 101 my $comma = 0; 102 my @o; 103 push @o, ".SH PROTOCOLS\n"; 104 105 if($p[0] eq "TLS") { 106 push @o, "All TLS based protocols: HTTPS, FTPS, IMAPS, POP3S, SMTPS etc."; 107 } 108 else { 109 my @s = sort @p; 110 for my $e (sort @s) { 111 push @o, sprintf "%s$e", 112 $comma ? (($e eq $s[-1]) ? " and " : ", "): ""; 113 $comma = 1; 114 } 115 } 116 push @o, "\n"; 117 return @o; 118} 119 120sub outtls { 121 my (@t) = @_; 122 my $comma = 0; 123 my @o; 124 if($t[0] eq "All") { 125 push @o, "\nAll TLS backends support this option."; 126 } 127 else { 128 push @o, "\nThis option works only with the following TLS backends:\n"; 129 my @s = sort @t; 130 for my $e (@s) { 131 push @o, sprintf "%s$e", 132 $comma ? (($e eq $s[-1]) ? " and " : ", "): ""; 133 $comma = 1; 134 } 135 } 136 push @o, "\n"; 137 return @o; 138} 139 140my %knownprotos = ( 141 'DICT' => 1, 142 'FILE' => 1, 143 'FTP' => 1, 144 'FTPS' => 1, 145 'GOPHER' => 1, 146 'GOPHERS' => 1, 147 'HTTP' => 1, 148 'HTTPS' => 1, 149 'IMAP' => 1, 150 'IMAPS' => 1, 151 'LDAP' => 1, 152 'LDAPS' => 1, 153 'MQTT' => 1, 154 'POP3' => 1, 155 'POP3S' => 1, 156 'RTMP' => 1, 157 'RTMPS' => 1, 158 'RTSP' => 1, 159 'SCP' => 1, 160 'SFTP' => 1, 161 'SMB' => 1, 162 'SMBS' => 1, 163 'SMTP' => 1, 164 'SMTPS' => 1, 165 'TELNET' => 1, 166 'TFTP' => 1, 167 'WS' => 1, 168 'WSS' => 1, 169 'TLS' => 1, 170 'TCP' => 1, 171 'All' => 1 172 ); 173 174my %knowntls = ( 175 'BearSSL' => 1, 176 'GnuTLS' => 1, 177 'mbedTLS' => 1, 178 'OpenSSL' => 1, 179 'rustls' => 1, 180 'Schannel' => 1, 181 'Secure Transport' => 1, 182 'wolfSSL' => 1, 183 'All' => 1, 184 ); 185 186sub single { 187 my @seealso; 188 my @proto; 189 my @tls; 190 my $d; 191 my ($f)=@_; 192 my $copyright; 193 my $errors = 0; 194 my $fh; 195 my $line; 196 my $list; 197 my $tlslist; 198 my $section; 199 my $source; 200 my $spdx; 201 my $start = 0; 202 my $title; 203 204 if(defined($f)) { 205 if(!open($fh, "<:crlf", "$f")) { 206 print STDERR "cd2nroff failed to open '$f' for reading: $!\n"; 207 return 1; 208 } 209 } 210 else { 211 $f = "STDIN"; 212 $fh = \*STDIN; 213 binmode($fh, ":crlf"); 214 } 215 while(<$fh>) { 216 $line++; 217 if(!$start) { 218 if(/^---/) { 219 # header starts here 220 $start = 1; 221 } 222 next; 223 } 224 if(/^Title: *(.*)/i) { 225 $title=$1; 226 } 227 elsif(/^Section: *(.*)/i) { 228 $section=$1; 229 } 230 elsif(/^Source: *(.*)/i) { 231 $source=$1; 232 } 233 elsif(/^See-also: +(.*)/i) { 234 $list = 1; # 1 for see-also 235 push @seealso, $1; 236 } 237 elsif(/^See-also: */i) { 238 if($seealso[0]) { 239 print STDERR "$f:$line:1:ERROR: bad See-Also, needs list\n"; 240 return 2; 241 } 242 $list = 1; # 1 for see-also 243 } 244 elsif(/^Protocol:/i) { 245 $list = 2; # 2 for protocol 246 } 247 elsif(/^TLS-backend:/i) { 248 $list = 3; # 3 for TLS backend 249 } 250 elsif(/^ +- (.*)/i) { 251 # the only lists we support are see-also and protocol 252 if($list == 1) { 253 push @seealso, $1; 254 } 255 elsif($list == 2) { 256 push @proto, $1; 257 } 258 elsif($list == 3) { 259 push @tls, $1; 260 } 261 else { 262 print STDERR "$f:$line:1:ERROR: list item without owner?\n"; 263 return 2; 264 } 265 } 266 # REUSE-IgnoreStart 267 elsif(/^C: (.*)/i) { 268 $copyright=$1; 269 } 270 elsif(/^SPDX-License-Identifier: (.*)/i) { 271 $spdx=$1; 272 } 273 # REUSE-IgnoreEnd 274 elsif(/^---/) { 275 # end of the header section 276 if(!$title) { 277 print STDERR "ERROR: no 'Title:' in $f\n"; 278 return 1; 279 } 280 if(!$section) { 281 print STDERR "ERROR: no 'Section:' in $f\n"; 282 return 2; 283 } 284 if(!$seealso[0]) { 285 print STDERR "$f:$line:1:ERROR: no 'See-also:' present\n"; 286 return 2; 287 } 288 if(!$copyright) { 289 print STDERR "$f:$line:1:ERROR: no 'C:' field present\n"; 290 return 2; 291 } 292 if(!$spdx) { 293 print STDERR "$f:$line:1:ERROR: no 'SPDX-License-Identifier:' field present\n"; 294 return 2; 295 } 296 if($section == 3) { 297 if(!$proto[0]) { 298 printf STDERR "$f:$line:1:ERROR: missing Protocol:\n"; 299 exit 2; 300 } 301 my $tls = 0; 302 for my $p (@proto) { 303 if($p eq "TLS") { 304 $tls = 1; 305 } 306 if(!$knownprotos{$p}) { 307 printf STDERR "$f:$line:1:ERROR: invalid protocol used: $p:\n"; 308 exit 2; 309 } 310 } 311 # This is for TLS, require TLS-backend: 312 if($tls) { 313 if(!$tls[0]) { 314 printf STDERR "$f:$line:1:ERROR: missing TLS-backend:\n"; 315 exit 2; 316 } 317 for my $t (@tls) { 318 if(!$knowntls{$t}) { 319 printf STDERR "$f:$line:1:ERROR: invalid TLS backend: $t:\n"; 320 exit 2; 321 } 322 } 323 } 324 } 325 last; 326 } 327 else { 328 chomp; 329 print STDERR "WARN: unrecognized line in $f, ignoring:\n:'$_';" 330 } 331 } 332 333 if(!$start) { 334 print STDERR "$f:$line:1:ERROR: no header present\n"; 335 return 2; 336 } 337 338 my @desc; 339 my $quote = 0; 340 my $blankline = 0; 341 my $header = 0; 342 343 # cut off the leading path from the file name, if any 344 $f =~ s/^(.*[\\\/])//; 345 346 push @desc, ".\\\" generated by cd2nroff $cd2nroff from $f\n"; 347 push @desc, ".TH $title $section \"$date\" $source\n"; 348 while(<$fh>) { 349 $line++; 350 351 $d = $_; 352 353 if($quote) { 354 if($quote == 4) { 355 # remove the indentation 356 if($d =~ /^ (.*)/) { 357 push @desc, "$1\n"; 358 next; 359 } 360 else { 361 # end of quote 362 $quote = 0; 363 push @desc, ".fi\n"; 364 next; 365 } 366 } 367 if(/^~~~/) { 368 # end of quote 369 $quote = 0; 370 push @desc, ".fi\n"; 371 next; 372 } 373 # convert single backslahes to doubles 374 $d =~ s/\\/\\\\/g; 375 # lines starting with a period needs it escaped 376 $d =~ s/^\./\\&./; 377 push @desc, $d; 378 next; 379 } 380 381 # remove single line HTML comments 382 $d =~ s/<!--.*?-->//g; 383 384 # **bold** 385 $d =~ s/\*\*(\S.*?)\*\*/\\fB$1\\fP/g; 386 # *italics* 387 $d =~ s/\*(\S.*?)\*/\\fI$1\\fP/g; 388 389 if($d =~ /[^\\][\<\>]/) { 390 print STDERR "$f:$line:1:WARN: un-escaped < or > used\n"; 391 } 392 # convert backslash-'<' or '> to just the second character 393 $d =~ s/\\([<>])/$1/g; 394 395 # mentions of curl symbols with man pages use italics by default 396 $d =~ s/((lib|)curl([^ ]*\(3\)))/\\fI$1\\fP/gi; 397 398 # backticked becomes italics 399 $d =~ s/\`(.*?)\`/\\fI$1\\fP/g; 400 401 if(/^## (.*)/) { 402 my $word = $1; 403 # if there are enclosing quotes, remove them first 404 $word =~ s/[\"\'\`](.*)[\"\'\`]\z/$1/; 405 406 # enclose in double quotes if there is a space present 407 if($word =~ / /) { 408 push @desc, ".IP \"$word\"\n"; 409 } 410 else { 411 push @desc, ".IP $word\n"; 412 } 413 $header = 1; 414 } 415 elsif(/^# (.*)/) { 416 my $word = $1; 417 # if there are enclosing quotes, remove them first 418 $word =~ s/[\"\'](.*)[\"\']\z/$1/; 419 420 if($word eq "PROTOCOLS") { 421 print STDERR "$f:$line:1:WARN: PROTOCOLS section in source file\n"; 422 } 423 elsif($word eq "EXAMPLE") { 424 # insert the generated PROTOCOLS section before EXAMPLE 425 push @desc, outprotocols(@proto); 426 427 if($proto[0] eq "TLS") { 428 push @desc, outtls(@tls); 429 } 430 } 431 push @desc, ".SH $word\n"; 432 $header = 1; 433 } 434 elsif(/^~~~c/) { 435 # start of a code section, not indented 436 $quote = 1; 437 push @desc, "\n" if($blankline && !$header); 438 $header = 0; 439 push @desc, ".nf\n"; 440 } 441 elsif(/^~~~/) { 442 # start of a quote section; not code, not indented 443 $quote = 1; 444 push @desc, "\n" if($blankline && !$header); 445 $header = 0; 446 push @desc, ".nf\n"; 447 } 448 elsif(/^ (.*)/) { 449 # quoted, indented by 4 space 450 $quote = 4; 451 push @desc, "\n" if($blankline && !$header); 452 $header = 0; 453 push @desc, ".nf\n$1\n"; 454 } 455 elsif(/^[ \t]*\n/) { 456 # count and ignore blank lines 457 $blankline++; 458 } 459 else { 460 # don't output newlines if this is the first content after a 461 # header 462 push @desc, "\n" if($blankline && !$header); 463 $blankline = 0; 464 $header = 0; 465 466 # quote minuses in the output 467 $d =~ s/([^\\])-/$1\\-/g; 468 # replace single quotes 469 $d =~ s/\'/\\(aq/g; 470 # handle double quotes first on the line 471 $d =~ s/^(\s*)\"/$1\\&\"/; 472 473 # lines starting with a period needs it escaped 474 $d =~ s/^\./\\&./; 475 476 if($d =~ /^(.*) /) { 477 printf STDERR "$f:$line:%d:ERROR: 2 spaces detected\n", 478 length($1); 479 $errors++; 480 } 481 if($d =~ /^[ \t]*\n/) { 482 # replaced away all contents 483 $blankline= 1; 484 } 485 else { 486 push @desc, $d; 487 } 488 } 489 } 490 if($fh != \*STDIN) { 491 close($fh); 492 } 493 push @desc, outseealso(@seealso); 494 if($dir) { 495 if($keepfilename) { 496 $title = $f; 497 $title =~ s/\.[^.]*$//; 498 } 499 my $outfile = "$dir/$title.$section"; 500 if(defined($extension)) { 501 $outfile .= $extension; 502 } 503 if(!open(O, ">", $outfile)) { 504 print STDERR "Failed to open $outfile : $!\n"; 505 return 1; 506 } 507 print O @desc; 508 close(O); 509 } 510 else { 511 print @desc; 512 } 513 return $errors; 514} 515 516if(@ARGV) { 517 for my $f (@ARGV) { 518 my $r = single($f); 519 if($r) { 520 exit $r; 521 } 522 } 523} 524else { 525 exit single(); 526} 527