firmware-base/vendor/sming/Sming/Tools/ci/scanlog.py

#!/usr/bin/env python
#
# Python application to scan CI build logs and extract useful information
#
# Logs can be in two formats:
#     1. Logs fetched using `gh run view --log nnn` have all jobs in single file.
#        Each line is structured as "{job}\t{step}\t{datetime} {line}"
#     2. Logs captured manually via web browser are for a single job.
#        Each line is structured as "{datetime} {line}"
#
# The most useful is (1) since it can be scripted, and requires fewer steps.
# The two separate logs (regular and esp32 runs) can also be combined and processed as a single file.
#
# Use cases:
#
#   Generate table of memory usage per sample
#   Given a second log file, compare each run to show summary of differences in memory usage
#   Pull out all warnings
#       There are many duplicates in a run and across runs, so we can filter these
#       to produce a definitive list.
#

import argparse
import os
import sys
import re
import subprocess
import json

class Table:
    COL_SEP = '  '

    def __init__(self):
        self.headings = []
        self.rows = []
        self.col_widths = []

    def append(self, row_data: dict):
        for k in row_data.keys():
            if k in self.headings:
                continue
            self.headings.append(k)
            self.col_widths.append(len(k))
        row = ['' for x in self.headings]
        for k, v in row_data.items():
            i = self.headings.index(k)
            row[i] = v
            self.col_widths[i] = max(self.col_widths[i], len(v))
        self.rows.append(row)

    def _format_values(self, values: list):
        return self.COL_SEP.join(str(v).ljust(self.col_widths[i]) for i, v in enumerate(values))

    def format_headings(self):
        return self._format_values(self.headings)

    def format_separator(self):
        return self.COL_SEP.join(''.ljust(w, '-') for w in self.col_widths)

    def format_row(self, row: int | list):
        if isinstance(row, int):
            row = self.rows[row]
        return self._format_values(row)

    def find_row(self, name: str):
        return next((i for i, row in enumerate(self.rows) if row[0] == name), -1)

    def __iter__(self):
        return TableFormatter(self)


class TableFormatter:
    def __init__(self, table: Table):
        self.table = table
        self.row_index = -2

    def __next__(self):
        table = self.table
        idx = self.row_index
        if idx == -2:
            self.row_index = -1
            return table.format_headings()
        if idx == -1:
            self.row_index = 0
            return table.format_separator()
        if idx < len(table.rows):
            self.row_index = idx + 1
            return table.format_row(idx)
        raise StopIteration


class Job:
    # Second figure in warning lines not reliable, remove it
    WARNING_SPLIT = re.compile(r':(\d+): ?(\d+: )?(warning: )')

    def __init__(self, name: str):
        self.name = name
        self.table: Table = Table()
        self.warnings: dict[str, str] = {} # De-duplicated warnings
        self.warning_count: int = 0 # Includes duplicates

    def parse_warning(self, line: str):
        self.warning_count += 1
        s = line.removeprefix('from ')
        x = Job.WARNING_SPLIT.split(line)
        if len(x) == 5:
            location, warning = Path.normalise(f'{x[0]}:{x[1]}'), x[4]
        else:
            location, warning = '?', s
        lines = self.warnings.setdefault(location, set())
        lines.add(warning)


class Log:
    def __init__(self, name: str):
        self.name = os.path.splitext(name)[0]
        self.jobs: list[Job] = []


class Path:
    # Paths vary by platform, so normalise them
    IGNORE_PREFIX = [
        # Linux
        '/home/runner/projects/',
        '/home/runner/work/Sming/Sming/Sming/',
        '/opt/',
        # MacOS
        '/Users/runner/projects/',
        '/Users/runner/work/Sming/Sming/Sming/',
        # Windows
        '/d/a/Sming/Sming/projects/',
        '/d/a/Sming/Sming/Sming/',
        'd:/a/Sming/Sming/projects/',
        'd:/a/Sming/Sming/Sming/',
    ]
    REMOVE = re.compile('|'.join(f'^{s}' for s in IGNORE_PREFIX), re.IGNORECASE)
    SUBST = re.compile(r'^d:/opt/esp-idf-\d.\d', re.IGNORECASE)

    @staticmethod
    def normalise(line: str) -> str:
        s = line.replace('\\', '/')
        s = Path.REMOVE.sub('', s)
        s = Path.SUBST.sub('esp-idf', s)
        return os.path.normpath(s)


class Parser:
    JOB_LINE = re.compile(r'build \((.+?)\)\t(Build and test .+)\t(.+)')
    BUILD_LINE = re.compile(r'Building (.*)/out/.*/clib-App.*')

    def __init__(self):
        self.log = None
        self.job = None
        self.state = None
        self.target = None
        self._link_line = None
        self.row = None

    def scan(self, filename: str):
        sys.stderr.write(f'Scanning {filename}\n')

        self.log = Log(filename)
        with open(filename, 'rb') as logfile:
            for line_index, line in enumerate(logfile):
                try:
                    self.parse_line(line)
                except Exception as e:
                    e.add_note(f'Parsing line {line_index+1}')
                    raise e

        sys.stderr.write('\r\033[K')
        self.log.jobs.sort(key=lambda job: job.name)
        return self.log

    def parse_line(self, line: str):
        line = line.decode('utf-8-sig').strip()
        # Replace typographical quotes with normal ones to ensure equivalence
        line = re.sub(r"‘|’", "'", line)
        job_name = None
        if not line[:4].isdigit():
            # Not a date: assume line is from GH CLI logfile, looking for valid build line
            m = Parser.JOB_LINE.match(line)
            if not m:
                return
            job_name, _, line = m[1], m[2], m[3]
            if self.job and self.job.name != job_name:
                if self.row:
                    self.job.table.append(self.row)
                    self.row = None
                self.job = None
        if self.job is None:
            sys.stderr.write(f'\r{job_name} ...\033[K')
            self.job = Job(job_name)
            self.log.jobs.append(self.job)
            self.state = self._searching
        dtstr, _, line = line.partition(' ')
        if not dtstr:
            return
        if ': warning:' in line:
            self.job.parse_warning(line)
            return
        self.state(line)

    def _searching(self, line: str):
        '''Searching for `Building ... clib_App ... after which comes the memory usage summary'''
        match = Parser.BUILD_LINE.match(line)
        if match:
            self.target = Path.normalise(match[1])
            self._link_line = f'{os.path.basename(self.target)}: Linking'
            self.state = self._building

    def _building(self, line: str):
        if line.startswith(self._link_line):
            self.state = self._linking
            self.row = None

    def _linking(self, line: str):
        if self.row is None:
            if line.startswith('----'):
                self.row = {'target': self.target}
            return
        if '|' in line:
            cols = line.split('|')
            k, v = cols[0], cols[4]
        elif ' : ' in line:
            k, v = line.split(':')
        else:
            self.job.table.append(self.row)
            self.row = self.target = None
            self.state = self._searching
            return
        k, v = k.strip(), v.strip()
        self.row[k] = v


def print_table(table: Table):
    for line in table:
        print(' ', line)
    print()


def merge_warnings(log: Log) -> dict[str, set]:
    warnings = {}
    total_warning_count = 0
    for job in log.jobs:
        total_warning_count += job.warning_count
        for location, details in job.warnings.items():
            location_warnings = warnings.setdefault(location, set())
            location_warnings |= details
    return warnings


def print_warnings(warnings: dict[str, set], exclude_file: str):
    exclude = None
    if exclude_file is not None:
        with open(exclude_file, 'r', encoding='utf-8') as f:
            s = '|'.join(line.strip() for line in f)
            exclude = re.compile(s, re.IGNORECASE)

    exclude_count = 0
    if exclude:
        unfiltered_warnings = warnings
        warnings = {}
        for location, details in unfiltered_warnings.items():
            filtered_details = []
            for det in details:
                if not exclude.match(f'{location}\t{det}'):
                    filtered_details.append(det)
            if filtered_details:
                warnings[location] = filtered_details
            else:
                exclude_count += 1

    print(f'Listing {len(warnings)} locations, {exclude_count} excluded.')

    loc_width = min(2 + max(len(loc) for loc in warnings), 80)
    loc_pad = ''.ljust(loc_width)
    for location in sorted(warnings, key=lambda s: s.lower()):
        if len(location) > loc_width:
            print(f'\t{location}')
            locstr = loc_pad
        else:
            locstr = f'{location}'.ljust(loc_width)
        for det in sorted(warnings[location]):
            print(f'\t{locstr}{det}')
            locstr = loc_pad
    print()


def fetch_logs(filename: str, repo: str = None, branch: str = None):
    if os.path.exists(filename):
        sys.stderr.write(f'{filename} exists, skipping download.\n')
        return
    def get_args(cmd: str):
        args = ['gh', 'run', cmd]
        if repo:
            args.append(f'-R={repo}')
        return args
    args = get_args('list')
    if branch:
        args.append(f'-b={branch}')
    args.append('--json=displayTitle,headBranch,number,name,databaseId,headSha,conclusion')
    r = subprocess.run(args, capture_output=True, encoding='utf-8', check=True)
    data = json.loads(r.stdout)

    joblist = []
    for job in data:
        if '(CI)' not in job['name']:
            continue
        if joblist and job['headSha'] != joblist[0]['headSha']:
            break
        joblist.append(job)

    with open(filename, 'w', encoding='utf-8') as f:
        sys.stderr.write(f'Creating {filename}...\n')
        for job in joblist:
            job_id = job['databaseId']
            sys.stderr.write(f'Fetching {job_id}: "{job["displayTitle"]}" - {job["headBranch"]} - {job["name"]} - {job["conclusion"]}\n')
            try:
                args = get_args('view') + ['--log', str(job_id)]
                r = subprocess.run(args, stdout=f, encoding='utf-8', check=True)
            except:
                os.unlink(filename)
                raise


def print_diff(log1: Log, log2: Log):
    for job1 in log1.jobs:
        try:
            job2 = next(job for job in log2.jobs if job.name == job1.name)
        except StopIteration:
            print(f'** job "{job1.name}" not found in "{log2.name}"')
            continue
        table1 = job1.table
        table2 = job2.table
        for row1 in table1.rows:
            target = row1[0]
            i = table2.find_row(target)
            if i < 0:
                print(f'** {target} NOT found in {log2.name} - {job2.name}')
                continue
            row2 = table2.rows.pop(i)
            if row2 == row1:
                continue

            diff_table = Table()

            data = {'log': log1.name}
            for k, v in zip(table1.headings[1:], row1[1:]):
                data[k] = v
            diff_table.append(data)
            data = {'log': log2.name}
            for k, v in zip(table2.headings[1:], row2[1:]):
                data[k] = v
            diff_table.append(data)

            data = {'log': 'Difference'}
            for name, v1, v2 in zip(table1.headings[1:], row1[1:], row2[1:]):
                if v1 == v2:
                    continue
                v1, v2 = int(v1, 0), int(v2, 0)
                data[name] = f'{v2-v1:+}'
            diff_table.append(data)

            print(f'{job1.name}: {target}')
            print_table(diff_table)

    if table2.rows:
        print(f'** Targets not in {log1.name}')
        print_table(table2)


def main():
    parser = argparse.ArgumentParser(description='Sming CI log parser')
    parser.add_argument('filename', help='Log filename to read/write')
    parser.add_argument('-f', '--fetch', action='store_true', help='Fetch most recent CI runs from repo')
    parser.add_argument('-R', '--repo', help='Override default repo for fetch')
    parser.add_argument('-b', '--branch', help='Specify branch to fetch')
    parser.add_argument('-c', '--compare', help='Second log to compare')
    parser.add_argument('-w', '--warnings', action='store_true', help='Summarise warnings')
    parser.add_argument('-x', '--exclude', help='File containing source locations to exclude')
    parser.add_argument('-m', '--merge', action='store_true', help='Merge warnings from all jobs')

    args = parser.parse_args()

    if args.fetch:
        fetch_logs(args.filename, repo=args.repo, branch=args.branch)

    log1 = Parser().scan(args.filename)
    if args.compare is None:
        if args.warnings:
            if args.merge:
                print(f'Total warnings: {sum(job.warning_count for job in log1.jobs)} from {len(log1.jobs)} jobs.')
                warnings = merge_warnings(log1)
                print_warnings(warnings, args.exclude)
            else:
                for i, job in enumerate(log1.jobs):
                    print(f'Job #{i+1}: {job.name} - {job.warning_count} warnings')
                    print_warnings(job.warnings, args.exclude)
        else:
            for job in log1.jobs:
                print(job.name)
                print_table(job.table)
        return

    log2 = Parser().scan(args.compare)
    print_diff(log1, log2)


if __name__ == "__main__":
    main()