Add lint step to check module READMEs (#154)

* Add documentation check to lint process

* Split get_variables_outputs

* Install tfdoc dependencies in linting pipeline

* doc lint variant

* use new script args in ci build config

* exit early if doc does not exist

* align README docs with variables

Co-authored-by: Ludovico Magnocavallo <ludomagno@google.com>
This commit is contained in:
Julio Castillo
2020-11-07 09:12:41 +01:00
committed by GitHub
parent 3bb8ca5388
commit c31764fa7e
9 changed files with 149 additions and 14 deletions

14
tools/__init__.py Normal file
View File

@@ -0,0 +1,14 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

64
tools/check_boilerplate.py Executable file
View File

@@ -0,0 +1,64 @@
#!/usr/bin/env python3
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os
import re
import sys
_EXCLUDE_DIRS = ('.git', '.terraform')
_EXCLUDE_RE = re.compile(r'# skip boilerplate check')
_MATCH_FILES = (
'Dockerfile', '.py', '.sh', '.tf', '.yaml', '.yml'
)
_MATCH_STRING = (
r'^\s*[#\*]\sCopyright [0-9]{4} Google LLC$\s+[#\*]\s+'
r'[#\*]\sLicensed under the Apache License, Version 2.0 '
r'\(the "License"\);\s+'
)
_MATCH_RE = re.compile(_MATCH_STRING, re.M)
def main(dir):
"Cycle through files in dir and check for the Apache 2.0 boilerplate."
errors, warnings = [], []
for root, dirs, files in os.walk(dir):
dirs[:] = [d for d in dirs if d not in _EXCLUDE_DIRS]
for fname in files:
if fname in _MATCH_FILES or os.path.splitext(fname)[1] in _MATCH_FILES:
fpath = os.path.abspath(os.path.join(root, fname))
content = open(fpath).read()
if _EXCLUDE_RE.search(content):
continue
try:
if not _MATCH_RE.search(content):
errors.append(fpath)
except (IOError, OSError):
warnings.append(fpath)
if warnings:
print('The following files cannot be accessed:')
print('\n'.join(' - {}'.format(s) for s in warnings))
if errors:
print('The following files are missing the license boilerplate:')
print('\n'.join(' - {}'.format(s) for s in errors))
sys.exit(1)
if __name__ == '__main__':
if len(sys.argv) != 2:
raise SystemExit('No directory passed.')
main(sys.argv[1])

75
tools/check_documentation.py Executable file
View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python3
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import enum
import pathlib
import sys
import click
import tfdoc
BASEDIR = pathlib.Path(__file__).resolve().parents[1]
class DocState(enum.Enum):
MISSING = 1
OK = 2
STALE = 3
UNKNOWN = 4
def __str__(self):
return {1: '', 2: '', 3: '', 4: '?'}[self.value]
def check_path(pathname):
path = BASEDIR / pathname
subpaths = sorted(list(path.iterdir()))
for subpath in subpaths:
if not subpath.is_dir():
continue
if subpath.stem.startswith('_'):
continue
doc = subpath / 'README.md'
if not doc.exists():
yield DocState.MISSING, subpath.stem
continue
state = tfdoc.check_state(subpath)
if state is False:
yield DocState.STALE, subpath.stem
elif state:
yield DocState.OK, subpath.stem
else:
yield DocState.UNKNOWN, subpath.stem
@click.command()
@click.argument('paths', type=str, nargs=-1)
def main(paths):
"Cycle through modules and ensure READMEs are up-to-date."
error = False
for path in paths:
print(f'checking {path}')
for state, name in check_path(path):
if state in (DocState.MISSING, DocState.STALE):
error = True
print(f' [{state}] {name}')
if error:
print('errors were present')
sys.exit(1)
if __name__ == '__main__':
main()

View File

@@ -179,7 +179,7 @@ def format_variables(variables, required_first=True):
'| {required} | {default} |'
)
for v in variables:
default = default_spec = type_spec = ''
default = type_spec = ''
if not v.required:
default = '<code title="{title}">{default}</code>'
if '\n' in v.default:
@@ -236,22 +236,49 @@ def replace_doc(module, doc):
raise SystemExit('Error replacing in README: %s' % e)
def get_variables(path):
"Get variables for the module in a path"
variables = []
for path in glob.glob(os.path.join(path, 'variables*tf')):
with open(path) as file:
variables += [v for v in parse_items(
file.read(), RE_VARIABLES, VariableToken, Variable, VariableData)]
return variables
def get_outputs(path):
"Get outputs for the module in a path"
outputs = []
for path in glob.glob(os.path.join(path, 'outputs*tf')):
with open(path) as file:
outputs += [o for o in parse_items(
file.read(), RE_OUTPUTS, OutputToken, Output, OutputData)]
return outputs
def check_state(path):
"""Determine if a module's README has all its variables and outputs
documentation up-to-date."""
try:
variables = get_variables(path)
outputs = get_outputs(path)
readme = open(os.path.join(path, 'README.md')).read()
except (IOError, OSError):
return
m = re.search('(?sm)%s.*%s' % (MARK_BEGIN, MARK_END), readme)
if not m:
return
return get_doc(variables, outputs) in readme
@click.command()
@click.argument('module', type=click.Path(exists=True))
@click.option('--replace/--no-replace', default=True)
def main(module=None, replace=True):
"Program entry point."
try:
variables = []
for path in glob.glob(os.path.join(module, 'variables*tf')):
with open(path) as file:
variables += [v for v in parse_items(
file.read(), RE_VARIABLES, VariableToken, Variable, VariableData)]
outputs = []
for path in glob.glob(os.path.join(module, 'outputs*tf')):
with open(path) as file:
outputs += [o for o in parse_items(
file.read(), RE_OUTPUTS, OutputToken, Output, OutputData)]
variables = get_variables(module)
outputs = get_outputs(module)
except (IOError, OSError) as e:
raise SystemExit(e)
doc = get_doc(variables, outputs)