From ee296efe085431e866907beaf77d4b06b69dd4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20Domozi?= Date: Thu, 5 Mar 2026 18:14:35 +0100 Subject: [PATCH] Include skiplist file in the analysis info This patch introduces a new table called AnalysisInfoFile, which is intended to store files related to analysis information. The actual file contents are stored separately in the FileContent table. During CodeChecker analyze, we create a new directory called conf/ in the report directory. The entire conf/ directory will be added to the massStoreRun ZIP file. With this PR, only the skipfile is copied to the conf directory, but this could be extended later. --- analyzer/codechecker_analyzer/cli/analyze.py | 29 ++++++ web/client/codechecker_client/cli/store.py | 7 ++ .../codechecker_server/api/mass_store_run.py | 91 ++++++++++++------- .../codechecker_server/api/report_server.py | 16 +++- .../codechecker_server/database/db_cleanup.py | 8 +- .../database/run_db_model.py | 29 ++++++ web/server/codechecker_server/metadata.py | 5 + ...5047b6513_add_analysis_info_files_table.py | 51 +++++++++++ .../store/test_proj/divide_zero/skipfile | 1 + web/tests/functional/store/test_store.py | 19 ++++ 10 files changed, 218 insertions(+), 38 deletions(-) create mode 100644 web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py create mode 100644 web/tests/functional/store/test_proj/divide_zero/skipfile diff --git a/analyzer/codechecker_analyzer/cli/analyze.py b/analyzer/codechecker_analyzer/cli/analyze.py index 7daae8ffad..8ee5474165 100644 --- a/analyzer/codechecker_analyzer/cli/analyze.py +++ b/analyzer/codechecker_analyzer/cli/analyze.py @@ -1094,6 +1094,34 @@ def __update_review_status_config(args): os.symlink(args.review_status_config, rs_config_to_send) +def __update_analysis_config_files(args): + """ + Copy analysis related configuration files (e.g. skipfile) + to report_dir/conf/. + This directory will be included in the ZIP file, + which will be stored on the server. + """ + conf_dir = os.path.join(args.output_path, "conf") + + # Remove any config files used during previous analysis + if os.path.isdir(conf_dir): + shutil.rmtree(conf_dir) + + # Create a new conf directory + os.makedirs(conf_dir) + + def add_file_to_conf_dir(file_path: str): + if not os.path.isfile(file_path): + return + + file_path = os.path.abspath(file_path) + filename = os.path.basename(file_path) + shutil.copyfile(file_path, os.path.join(conf_dir, filename)) + + if 'skipfile' in args: + add_file_to_conf_dir(args.skipfile) + + def __cleanup_metadata(metadata_prev, metadata): """ Cleanup metadata. @@ -1455,6 +1483,7 @@ def main(args): __update_skip_file(args) __update_review_status_config(args) + __update_analysis_config_files(args) LOG.debug("Cleanup metadata file started.") __cleanup_metadata(metadata_prev, metadata) diff --git a/web/client/codechecker_client/cli/store.py b/web/client/codechecker_client/cli/store.py index afe025b2c3..c835b96d36 100644 --- a/web/client/codechecker_client/cli/store.py +++ b/web/client/codechecker_client/cli/store.py @@ -505,6 +505,13 @@ def assemble_zip(inputs, files_to_compress[os.path.dirname(review_status_file_path)]\ .add(review_status_file_path) + # Add files from report_dir/conf/ directory + conf_dir = os.path.join(dir_path, "conf") + if os.path.isdir(conf_dir): + for file in os.listdir(os.fsencode(conf_dir)): + conf_file = os.path.join(conf_dir, os.fsdecode(file)) + files_to_compress[conf_dir].add(conf_file) + LOG.debug(f"Processing {len(analyzer_result_file_paths)} report files ...") analyzer_result_file_reports = parse_analyzer_result_files( diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py index 64a319991d..e806e198dd 100644 --- a/web/server/codechecker_server/api/mass_store_run.py +++ b/web/server/codechecker_server/api/mass_store_run.py @@ -15,11 +15,13 @@ from collections import defaultdict from datetime import datetime, timedelta import fnmatch +import hashlib from hashlib import sha256 import json import os from pathlib import Path import sqlalchemy +from sqlalchemy.orm import Session as SA_Session import tempfile import time from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, \ @@ -46,7 +48,7 @@ from ..database.config_db_model import Product from ..database.database import DBSession from ..database.run_db_model import \ - AnalysisInfo, AnalysisInfoChecker, AnalyzerStatistic, \ + AnalysisInfo, AnalysisInfoChecker, AnalysisInfoFile, AnalyzerStatistic, \ BugPathEvent, BugReportPoint, \ Checker, \ ExtendedReportData, \ @@ -814,8 +816,8 @@ def __add_file_content( self, session: DBSession, source_file_name: str, - content_hash: Optional[str] - ): + content_hash: Optional[str] = None + ) -> str: """ Add the necessary file contents. If content_hash in None then this function calculates the content hash. Or if it's available at the @@ -871,6 +873,8 @@ def __add_file_content( # the meantime. session.rollback() + return content_hash + def __store_checker_identifiers(self, checkers: Set[Tuple[str, str]]): """ Stores the identifiers "(analyzer, checker_name)" in the database into @@ -1000,6 +1004,32 @@ def __store_analysis_statistics( session.add(analyzer_statistics) + def __store_analysis_info_files( + self, + session: SA_Session, + analysis_info_id: int, + report_dir_path: str + ): + """ Store analyzer related config files (e.g. skipfile) """ + conf_dir_path = os.path.join(report_dir_path, "conf") + zip_conf_dir = os.path.join( + self._zip_dir, "reports", + hashlib.md5(conf_dir_path.encode('utf-8')).hexdigest()) + + if not os.path.isdir(zip_conf_dir): + return + + for file in os.listdir(os.fsencode(zip_conf_dir)): + conf_file = os.path.join(zip_conf_dir, os.fsdecode(file)) + content_hash = self.__add_file_content(session, conf_file) + + if (not session.get(AnalysisInfoFile, + (analysis_info_id, content_hash))): + session.add(AnalysisInfoFile( + analysis_info_id=analysis_info_id, + filename=os.path.basename(conf_file), + content_hash=content_hash)) + def __store_analysis_info( self, session: DBSession, @@ -1012,37 +1042,30 @@ def __store_analysis_info( analyzer_command.encode("utf-8"), zlib.Z_BEST_COMPRESSION) - analysis_info_rows = session \ - .query(AnalysisInfo) \ - .filter(AnalysisInfo.analyzer_command == cmd) \ - .all() - - if analysis_info_rows: - # It is possible when multiple runs are stored - # simultaneously to the server with the same analysis - # command that multiple entries are stored into the - # database. In this case we will select the first one. - analysis_info = analysis_info_rows[0] - else: - analysis_info = AnalysisInfo(analyzer_command=cmd) - - # Obtain the ID eagerly to be able to use the M-to-N table. - session.add(analysis_info) - session.flush() - session.refresh(analysis_info, ["id"]) - - for analyzer in mip.analyzers: - q = session \ - .query(Checker) \ - .filter(Checker.analyzer_name == analyzer) - db_checkers = {r.checker_name: r for r in q.all()} - - connection_rows = [AnalysisInfoChecker( - analysis_info, db_checkers[chk], is_enabled) - for chk, is_enabled - in mip.checkers.get(analyzer, {}).items()] - for r in connection_rows: - session.add(r) + analysis_info = AnalysisInfo(analyzer_command=cmd) + + # Obtain the ID eagerly to be able to use the M-to-N table. + session.add(analysis_info) + session.flush() + session.refresh(analysis_info, ["id"]) + + for analyzer in mip.analyzers: + q = session \ + .query(Checker) \ + .filter(Checker.analyzer_name == analyzer) + db_checkers = {r.checker_name: r for r in q.all()} + + connection_rows = [AnalysisInfoChecker( + analysis_info, db_checkers[chk], is_enabled) + for chk, is_enabled + in mip.checkers.get(analyzer, {}).items()] + for r in connection_rows: + session.add(r) + + if mip.report_dir_path: + self.__store_analysis_info_files(session, + analysis_info.id, + mip.report_dir_path) run_history.analysis_info.append(analysis_info) self.__analysis_info[src_dir_path] = analysis_info diff --git a/web/server/codechecker_server/api/report_server.py b/web/server/codechecker_server/api/report_server.py index 373f9f09fa..3dda35053f 100644 --- a/web/server/codechecker_server/api/report_server.py +++ b/web/server/codechecker_server/api/report_server.py @@ -63,7 +63,8 @@ from ..database.config_db_model import Product from ..database.database import conv, DBSession, escape_like from ..database.run_db_model import \ - AnalysisInfo, AnalysisInfoChecker as DB_AnalysisInfoChecker, \ + AnalysisInfo, \ + AnalysisInfoChecker as DB_AnalysisInfoChecker, AnalysisInfoFile, \ AnalyzerStatistic, \ BugPathEvent, BugReportPoint, \ CleanupPlan, CleanupPlanReportHash, Checker, Comment, \ @@ -1723,6 +1724,19 @@ def getAnalysisInfo(self, analysis_info_filter, limit, offset): checkers[analyzer][checker] = API_AnalysisInfoChecker( enabled=enabled) + analysis_config_files = session \ + .query(AnalysisInfoFile.filename, + FileContent.content) \ + .join(FileContent, AnalysisInfoFile.content_hash + == FileContent.content_hash) \ + .filter(AnalysisInfoFile.analysis_info_id + == cmd.id).all() + + # Append analysis files to the command string + for filename, content in analysis_config_files: + command += f"\n\n{filename}:\n" + command += zlib.decompress(content).decode("utf-8") + res.append(ttypes.AnalysisInfo( analyzerCommand=html.escape(command), checkers=checkers)) diff --git a/web/server/codechecker_server/database/db_cleanup.py b/web/server/codechecker_server/database/db_cleanup.py index 638e9c9a3b..9569e414c5 100644 --- a/web/server/codechecker_server/database/db_cleanup.py +++ b/web/server/codechecker_server/database/db_cleanup.py @@ -13,6 +13,7 @@ from typing import Dict import sqlalchemy +from sqlalchemy import union from codechecker_api.codeCheckerDBAccess_v6.ttypes import Severity @@ -21,7 +22,7 @@ from .database import DBSession from .run_db_model import \ - AnalysisInfo, \ + AnalysisInfo, AnalysisInfoFile, \ BugPathEvent, BugReportPoint, \ Comment, Checker, \ File, FileContent, \ @@ -108,8 +109,9 @@ def remove_unused_files(product): if total_count: LOG.debug("%d dangling files deleted.", total_count) - files = session.query(File.content_hash) \ - .group_by(File.content_hash) + files = union( + session.query(File.content_hash), + session.query(AnalysisInfoFile.content_hash)) session.query(FileContent) \ .filter(FileContent.content_hash.notin_(files)) \ diff --git a/web/server/codechecker_server/database/run_db_model.py b/web/server/codechecker_server/database/run_db_model.py index 4d346c9513..6884a00a14 100644 --- a/web/server/codechecker_server/database/run_db_model.py +++ b/web/server/codechecker_server/database/run_db_model.py @@ -79,12 +79,41 @@ def __init__(self, self.enabled = is_enabled +class AnalysisInfoFile(Base): + __tablename__ = "analysis_info_files" + + analysis_info_id = Column(Integer, + ForeignKey("analysis_info.id", + deferrable=True, + initially="DEFERRED", + ondelete="CASCADE"), + primary_key=True) + + filename = Column(String, nullable=False) + + content_hash = Column(String, + ForeignKey("file_contents.content_hash", + deferrable=True, + initially="DEFERRED", + ondelete="CASCADE"), + primary_key=True) + + def __init__(self, + analysis_info_id: int, + filename: str, + content_hash: str): + self.analysis_info_id = analysis_info_id + self.filename = filename + self.content_hash = content_hash + + class AnalysisInfo(Base): __tablename__ = "analysis_info" id = Column(Integer, autoincrement=True, primary_key=True) analyzer_command = Column(LargeBinary) available_checkers = relationship(AnalysisInfoChecker, uselist=True) + analyzer_files = relationship(AnalysisInfoFile, uselist=True) def __init__(self, analyzer_command: bytes): self.analyzer_command = analyzer_command diff --git a/web/server/codechecker_server/metadata.py b/web/server/codechecker_server/metadata.py index 02128f2805..c78e31b193 100644 --- a/web/server/codechecker_server/metadata.py +++ b/web/server/codechecker_server/metadata.py @@ -66,6 +66,8 @@ def __init__(self, metadata_file_path): self.disabled_checkers: DisabledCheckers = set() self.checker_to_analyzer: CheckerToAnalyzer = {} + self.report_dir_path = None + self.__metadata_dict: Dict[str, Any] = {} if os.path.isfile(metadata_file_path): self.__metadata_dict = cast(Dict[str, Any], @@ -184,6 +186,9 @@ def __process_metadata_info_v2(self): if tool['name'] == 'codechecker' and 'version' in tool: cc_versions.add(tool['version']) + if tool['name'] == 'codechecker': + self.report_dir_path = tool.get('output_path') + if 'command' in tool: check_commands.add(' '.join(tool['command'])) diff --git a/web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py b/web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py new file mode 100644 index 0000000000..96f8b65e4e --- /dev/null +++ b/web/server/codechecker_server/migrations/report/versions/29e5047b6513_add_analysis_info_files_table.py @@ -0,0 +1,51 @@ +""" +Add analysis_info_files table + +Revision ID: 29e5047b6513 +Revises: 198654dac219 +Create Date: 2026-03-05 17:35:36.286847 +""" + +from logging import getLogger + +from alembic import op +import sqlalchemy as sa + + +# Revision identifiers, used by Alembic. +revision = '29e5047b6513' +down_revision = '198654dac219' +branch_labels = None +depends_on = None + + +def upgrade(): + LOG = getLogger("migration/report") + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'analysis_info_files', + sa.Column('analysis_info_id', sa.Integer(), nullable=False), + sa.Column('filename', sa.String(), nullable=False), + sa.Column('content_hash', sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ['analysis_info_id'], ['analysis_info.id'], + name=op.f( + 'fk_analysis_info_files_analysis_info_id_analysis_info'), + ondelete='CASCADE', initially='DEFERRED', deferrable=True), + sa.ForeignKeyConstraint( + ['content_hash'], ['file_contents.content_hash'], + name=op.f( + 'fk_analysis_info_files_content_hash_file_contents'), + ondelete='CASCADE', initially='DEFERRED', deferrable=True), + sa.PrimaryKeyConstraint( + 'analysis_info_id', 'content_hash', + name=op.f('pk_analysis_info_files')) + ) + # ### end Alembic commands ### + + +def downgrade(): + LOG = getLogger("migration/report") + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('analysis_info_files') + # ### end Alembic commands ### diff --git a/web/tests/functional/store/test_proj/divide_zero/skipfile b/web/tests/functional/store/test_proj/divide_zero/skipfile new file mode 100644 index 0000000000..4bf46ff0fd --- /dev/null +++ b/web/tests/functional/store/test_proj/divide_zero/skipfile @@ -0,0 +1 @@ +-*.txt diff --git a/web/tests/functional/store/test_store.py b/web/tests/functional/store/test_store.py index 18d7e42d0d..9718f1a3b0 100644 --- a/web/tests/functional/store/test_store.py +++ b/web/tests/functional/store/test_store.py @@ -264,6 +264,8 @@ def test_store_multiple_report_dirs(self): cfg['reportdir'] = report_dir1 cfg['checkers'] = [ '-d', 'core.DivideZero', '-e', 'deadcode.DeadStores'] + cfg['skip_file'] = os.path.join(self._divide_zero_workspace, + 'skipfile') codechecker.analyze(cfg, self._divide_zero_workspace) with open(os.path.join(report_dir1, 'metadata.json'), 'r+', @@ -278,6 +280,7 @@ def test_store_multiple_report_dirs(self): cfg['reportdir'] = report_dir2 cfg['checkers'] = [ '-e', 'core.DivideZero', '-d', 'deadcode.DeadStores'] + cfg.pop('skip_file') codechecker.analyze(cfg, self._divide_zero_workspace) def store_multiple_report_dirs(report_dirs): @@ -323,12 +326,28 @@ def store_multiple_report_dirs(report_dirs): analysis_info_filter = AnalysisInfoFilter(runId=report['runId']) analysis_info = self._cc_client.getAnalysisInfo( analysis_info_filter, limit, offset) + self.assertEqual(len(analysis_info), 2) self.assertTrue( any(report_dir1 in i.analyzerCommand for i in analysis_info)) self.assertTrue( any(report_dir2 in i.analyzerCommand for i in analysis_info)) + # Skip file content + skip_file_info = "skipfile:\n-*.txt" + + # During the analysis of report_dir1, we used a skipfile, + # and the skipfile content should appear in the analyzer command. + self.assertTrue(all(skip_file_info in + i.analyzerCommand for i in analysis_info + if report_dir1 in i.analyzerCommand)) + + # No skipfile was used during the analysis of report_dir2, + # so we shouldn't see skipfile content in this case. + self.assertFalse(any(skip_file_info in + i.analyzerCommand for i in analysis_info + if report_dir2 in i.analyzerCommand)) + self.assertTrue(all( '<' not in i.analyzerCommand for i in analysis_info)) self.assertTrue(any(