Skip to content

Commit

Permalink
refactor(server): massStoreRun() as a background task
Browse files Browse the repository at this point in the history
Separate the previously blocking execution of `massStoreRun()`, which
was done in the context of the "API handler process", into a
"foreground" and a "background" part, exploiting the previously
implemented background task library support.

The foreground part remains executed in the context of the API handler
process, and deals with receiving and unpacking the to-be-stored data,
saving configuration and checking constraints that are cheap to check.
The foreground part can report issues synchronously to the client.

Everything else that was part of the previous `massStoreRun()` pipeline,
as implemented by the `mass_store_run.MassStoreRun` class becomes a
background task, such as the parsing of the uploaded reports and the
storing of data to the database.
This background task, implemented using the new library, executes in a
separate background worker process, and can not communicate directly
with the user.
Errors are logged to the `comments` fields.

The `massStoreRun()` API handler will continue to work as previously,
and block while waiting for the background task to terminate.
In case of an error, it synchronously reports a `RequestFailed` exception,
passing the `comments` field (into which the background process had
written the exception details) to the client.

Due to the inability for most of the exceptions previously caused in
`MassStoreRun` to "escape" as `RequestFailed`s, some parts of the API
had been deprecated and removed.
Namely, `ErrorCode.SOURCE_FILE` and `ErrorCode.REPORT_FORMAT` are no
longer sent over the API.
This does not break existing behaviour and does not cause an
incompatibility with clients: in cases where the request exceptions were
raised earlier, now a different type of exception is raised, but the
error message still precisely explains the problem as it did previously.
  • Loading branch information
whisperity committed Sep 18, 2024
1 parent d73c1da commit d42164b
Show file tree
Hide file tree
Showing 15 changed files with 1,313 additions and 948 deletions.
21 changes: 19 additions & 2 deletions codechecker_common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
import itertools
import json
import os
import pathlib
import random
from typing import TextIO
from typing import TextIO, Union

import portalocker

Expand Down Expand Up @@ -55,7 +56,10 @@ def chunks(iterator, n):
yield itertools.chain([first], rest_of_chunk)


def load_json(path: str, default=None, lock=False, display_warning=True):
def load_json(path: Union[str, pathlib.Path],
default=None,
lock=False,
display_warning=True):
"""
Load the contents of the given file as a JSON and return it's value,
or default if the file can't be loaded.
Expand Down Expand Up @@ -133,3 +137,16 @@ def generate_random_token(num_bytes: int = 32) -> str:
for _ in range(0, -(num_bytes // -64))])
idx = random.randrange(0, len(hash_value) - num_bytes + 1)
return hash_value[idx:(idx + num_bytes)]


def format_size(num: float, suffix: str = 'B') -> str:
"""
Pretty print storage units.
Source: http://stackoverflow.com/questions/1094841/
reusable-library-to-get-human-readable-version-of-file-size
"""
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi', 'Ri']:
if abs(num) < 1024.0:
return f"{num:3.1f} {unit}{suffix}"
num /= 1024.0
return f"{num:.1f} Qi{suffix}"
37 changes: 29 additions & 8 deletions web/api/codechecker_api_shared.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,35 @@ enum Ternary {
}

enum ErrorCode {
DATABASE,
IOERROR,
GENERAL,
AUTH_DENIED, // Authentication denied. We do not allow access to the service.
UNAUTHORIZED, // Authorization denied. User does not have right to perform an action.
API_MISMATCH, // The client attempted to query an API version that is not supported by the server.
SOURCE_FILE, // The client sent a source code which contains errors (e.g., source code comment errors).
REPORT_FORMAT, // The client sent a report with wrong format (e.g., report annotation has bad type in a .plist).
// Any other sort of error encountered during RPC execution.
GENERAL = 2,

// Executing the request triggered a database-level fault, constraint violation.
DATABASE = 0,

// The request is malformed or an internal I/O operation failed.
IOERROR = 1,

// Authentication denied. We do not allow access to the service.
AUTH_DENIED = 3,

// User does not have the necessary rights to perform an action.
UNAUTHORIZED = 4,

// The client attempted to query an API version that is not supported by the
// server.
API_MISMATCH = 5,

// REMOVED IN API v6.59 (CodeChecker v6.25.0)!
// Previously sent by report_server.thrif/codeCheckerDBAccess::massStoreRun()
// when the client uploaded a source file which contained errors, such as
// review status source-code-comment errors.
/* SOURCE_FILE = 6, */ // Never reuse the value of the enum constant!

// REMOVED IN API v6.59 (CodeChecker v6.25.0)!
// Previously sent by report_server.thrif/codeCheckerDBAccess::massStoreRun()
// when the client uploaded a report with annotations that had invalid types.
/* REPORT_FORMAT = 7, */ // Never reuse the value of the enum constant!
}

exception RequestFailed {
Expand Down
Binary file modified web/api/js/codechecker-api-node/dist/codechecker-api-6.59.0.tgz
Binary file not shown.
Binary file modified web/api/py/codechecker_api/dist/codechecker_api.tar.gz
Binary file not shown.
Binary file not shown.
37 changes: 7 additions & 30 deletions web/client/codechecker_client/cmd/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from typing import Dict, Iterable, List, Set, Tuple

from codechecker_api.codeCheckerDBAccess_v6.ttypes import StoreLimitKind
from codechecker_api_shared.ttypes import RequestFailed, ErrorCode

from codechecker_report_converter import twodim
from codechecker_report_converter.report import Report, report_file, \
Expand All @@ -58,15 +57,15 @@ def assemble_blame_info(_, __) -> int:
from codechecker_common.compatibility.multiprocessing import Pool
from codechecker_common.source_code_comment_handler import \
SourceCodeCommentHandler
from codechecker_common.util import load_json
from codechecker_common.util import format_size, load_json

from codechecker_web.shared import webserver_context, host_check
from codechecker_web.shared.env import get_default_workspace


LOG = logger.get_logger('system')

MAX_UPLOAD_SIZE = 1 * 1024 * 1024 * 1024 # 1GiB
MAX_UPLOAD_SIZE = 1024 ** 3 # 1024^3 = 1 GiB.


AnalyzerResultFileReports = Dict[str, List[Report]]
Expand All @@ -87,7 +86,7 @@ def assemble_blame_info(_, __) -> int:

"""Contains information about the report file after parsing.
store_it: True if every information is availabe and the
store_it: True if every information is available and the
report can be stored
main_report_positions: list of ReportLineInfo containing
the main report positions
Expand Down Expand Up @@ -135,19 +134,6 @@ def _write_summary(self, out=sys.stdout):
out.write("\n----=================----\n")


def sizeof_fmt(num, suffix='B'):
"""
Pretty print storage units.
Source: https://stackoverflow.com/questions/1094841/
reusable-library-to-get-human-readable-version-of-file-size
"""
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return f"{num:3.1f}{unit}{suffix}"
num /= 1024.0
return f"{num:.1f}Yi{suffix}"


def get_file_content_hash(file_path):
"""
Return the file content hash for a file.
Expand Down Expand Up @@ -649,7 +635,7 @@ def assemble_zip(inputs,
compressed_zip_size = os.stat(zip_file).st_size

LOG.info("Compressing report zip file done (%s / %s).",
sizeof_fmt(zip_size), sizeof_fmt(compressed_zip_size))
format_size(zip_size), format_size(compressed_zip_size))

# We are responsible for deleting these.
shutil.rmtree(temp_dir)
Expand Down Expand Up @@ -698,7 +684,7 @@ def get_analysis_statistics(inputs, limits):

if os.stat(compilation_db).st_size > compilation_db_size:
LOG.debug("Compilation database is too big (max: %s).",
sizeof_fmt(compilation_db_size))
format_size(compilation_db_size))
else:
LOG.debug("Copying file '%s' to analyzer statistics "
"ZIP...", compilation_db)
Expand All @@ -721,7 +707,7 @@ def get_analysis_statistics(inputs, limits):
if failed_files_size > failure_zip_limit:
LOG.debug("We reached the limit of maximum uploadable "
"failure zip size (max: %s).",
sizeof_fmt(failure_zip_limit))
format_size(failure_zip_limit))
break
else:
LOG.debug("Copying failure zip file '%s' to analyzer "
Expand Down Expand Up @@ -932,7 +918,7 @@ def main(args):
zip_size = os.stat(zip_file).st_size
if zip_size > MAX_UPLOAD_SIZE:
LOG.error("The result list to upload is too big (max: %s): %s.",
sizeof_fmt(MAX_UPLOAD_SIZE), sizeof_fmt(zip_size))
format_size(MAX_UPLOAD_SIZE), format_size(zip_size))
sys.exit(1)

b64zip = ""
Expand Down Expand Up @@ -1005,15 +991,6 @@ def main(args):
storing_analysis_statistics(client, args.input, args.name)

LOG.info("Storage finished successfully.")
except RequestFailed as reqfail:
if reqfail.errorCode == ErrorCode.SOURCE_FILE:
header = ['File', 'Line', 'Checker name']
table = twodim.to_str(
'table', header, [c.split('|') for c in reqfail.extraInfo])
LOG.warning("Setting the review statuses for some reports failed "
"because of non valid source code comments: "
"%s\n %s", reqfail.message, table)
sys.exit(1)
except Exception as ex:
import traceback
traceback.print_exc()
Expand Down
6 changes: 5 additions & 1 deletion web/client/codechecker_client/thrift_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ def wrapper(self, *args, **kwargs):
LOG.error(
'Client/server API mismatch\n %s', str(reqfailure.message))
else:
LOG.error('API call error: %s\n%s', func_name, str(reqfailure))
LOG.error("Error during API call: %s", func_name)
LOG.debug("%s", str(reqfailure))
LOG.error("%s", str(reqfailure.message))
if reqfailure.extraInfo:
LOG.error("%s", '\n'.join(reqfailure.extraInfo))
sys.exit(1)
except TApplicationException as ex:
LOG.error("Internal server error: %s", str(ex.message))
Expand Down
Loading

0 comments on commit d42164b

Please sign in to comment.