xref: /php-src/scripts/dev/tidy.php (revision 187f6857)
1#!/usr/bin/env php
2<?php
3
4set_error_handler(function($_, $msg) {
5    throw new Exception($msg);
6});
7
8if ($argc > 1) {
9    $dir = $argv[1];
10} else {
11    $dir = __DIR__ . '/../..';
12}
13if (!is_dir($dir)) {
14    echo "Directory $dir does not exist.\n";
15    exit(1);
16}
17
18$it = new RecursiveIteratorIterator(
19    new RecursiveDirectoryIterator($dir),
20    RecursiveIteratorIterator::LEAVES_ONLY
21);
22
23$excludes = [
24    // Bundled libraries / files.
25    'ext/date/lib/',
26    'ext/dom/lexbor/',
27    'ext/fileinfo/data_file.c',
28    'ext/fileinfo/libmagic/',
29    'ext/gd/libgd/',
30    'ext/hash/sha3/',
31    'ext/hash/hash_whirlpool.c',
32    'ext/hash/php_hash_whirlpool_tables.h',
33    'ext/mbstring/libmbfl/',
34    'ext/mbstring/unicode_data.h',
35    'ext/opcache/jit/ir',
36    'ext/pcre/pcre2lib/',
37    'ext/standard/html_tables/html_table_gen.php',
38    'sapi/cli/php_http_parser.c',
39    'sapi/cli/php_http_parser.h',
40    'sapi/litespeed/',
41    // Not a PHP file.
42    'ext/zlib/tests/data.inc',
43    // Flexible HEREDOC/NOWDOC tests are likely whitespace sensitive.
44    // TODO: Properly classify them.
45    'Zend/tests/flexible-',
46];
47
48foreach ($it as $file) {
49    if (!$file->isFile()) {
50        continue;
51    }
52
53    $path = $file->getPathName();
54    foreach ($excludes as $exclude) {
55        if (strpos($path, $exclude) !== false) {
56            continue 2;
57        }
58    }
59
60    $lang = getLanguageFromExtension($file->getExtension());
61    if ($lang === null) {
62        continue;
63    }
64
65    $origCode = $code = file_get_contents($path);
66
67    if ($lang === 'c') {
68        $code = stripTrailingWhitespace($code);
69        // TODO: Avoid this for now.
70        // $code = reindentToTabs($code);
71    } else if ($lang === 'php') {
72        $code = stripTrailingWhitespace($code);
73        $code = reindentToSpaces($code);
74    } else if ($lang === 'phpt') {
75        $code = transformTestCode($code, function(string $code) {
76            $code = stripTrailingWhitespace($code);
77            $code = reindentToSpaces($code);
78            return $code;
79        });
80    }
81
82    if ($origCode !== $code) {
83        file_put_contents($path, $code);
84    }
85}
86
87function stripTrailingWhitespace(string $code): string {
88    return preg_replace('/\h+$/m', '', $code);
89}
90
91function reindentToTabs(string $code): string {
92    return preg_replace_callback('/^ +/m', function(array $matches) {
93        $tabSize = 4;
94        $spaces = strlen($matches[0]);
95        $tabs = intdiv($spaces, $tabSize);
96        $spaces -= $tabs * $tabSize;
97        return str_repeat("\t", $tabs) . str_repeat(" ", $spaces);
98    }, $code);
99}
100
101function reindentToSpaces(string $code): string {
102    return preg_replace_callback('/^[ \t]+/m', function(array $matches) {
103        $tabSize = 4;
104        $indent = 0;
105        foreach (str_split($matches[0]) as $char) {
106            if ($char === ' ') {
107                $indent++;
108            } else {
109                $partialIndent = $indent % $tabSize;
110                if ($partialIndent === 0) {
111                    $indent += $tabSize;
112                } else {
113                    $indent += $tabSize - $partialIndent;
114                }
115            }
116        }
117        return str_repeat(" ", $indent);
118    }, $code);
119}
120
121function transformTestCode(string $code, callable $transformer): string {
122    // Don't transform whitespace-sensitive tests.
123    if (strpos($code, '--WHITESPACE_SENSITIVE--') !== false) {
124        return $code;
125    }
126
127    return preg_replace_callback(
128        '/(--(?:FILE|SKIPIF|CLEAN)--)(.+?)(?=--[A-Z_]+--)/s',
129        function(array $matches) use($transformer) {
130            return $matches[1] . $transformer($matches[2]);
131        },
132        $code
133    );
134}
135
136function getLanguageFromExtension(string $ext): ?string {
137    switch ($ext) {
138    case 'c':
139    case 'h':
140    case 'cpp':
141    case 'y':
142    case 'l':
143    case 're':
144        return 'c';
145    case 'php':
146    case 'inc':
147        return 'php';
148    case 'phpt':
149        return 'phpt';
150    default:
151        return null;
152    }
153}
154