summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas Dufresne <nicolas.dufresne@collabora.com>2020-04-02 17:42:05 -0400
committerNirbheek Chauhan <nirbheek@centricular.com>2020-09-01 13:56:42 +0530
commitd211542a900bb9f44254876ada7277e3ba78fdf3 (patch)
treeeb32c6a9ad72bb5450a9ccac272617b47f68c906
parent2aa6d2ad8ce17e0e9dd66517f1b9e47cfb30364d (diff)
cache: Re-implememt cache on top of our external storage
The goal is to reduce egress on FDO server. In this patch, the cache is split from fetch.py into it's own set of commands in cache.py. It now implements fetch-cache, gen-cache and upload-chache. upload-cache can only work if the SSH private key needed to upload has been set into CERBERO_PRIVATE_SSH_KEY environment or is available in your .ssh folder. The key will be made availabe through configuration in gstreamer/cerbero CI configuration and only available to protected branches on the gstreamer namespace. Backported-by: Nirbheek Chauhan <nirbheek@centricular.com>
-rw-r--r--cerbero/commands/cache.py275
-rw-r--r--cerbero/commands/fetch.py126
2 files changed, 275 insertions, 126 deletions
diff --git a/cerbero/commands/cache.py b/cerbero/commands/cache.py
new file mode 100644
index 00000000..f5b87366
--- /dev/null
+++ b/cerbero/commands/cache.py
@@ -0,0 +1,275 @@
+# cerbero - a multi-platform build system for Open Source software
+# Copyright (C) 2020 Nicolas Dufresne <nicolas.dufresne@collabora.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public
+# License as published by the Free Software Foundation; either
+# version 2 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Library General Public License for more details.
+#
+# You should have received a copy of the GNU Library General Public
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+import os, sys
+import json
+import tempfile
+import shutil
+from hashlib import sha256
+
+from cerbero.commands import Command, register_command
+from cerbero.errors import FatalError
+from cerbero.utils import _, N_, ArgparseArgument, git, shell
+from cerbero.utils import messages as m
+from cerbero.config import Distro
+
+class BaseCache(Command):
+ base_url = 'https://artifacts.gstreamer-foundation.net/cerbero-deps/%s/%s/%s'
+ ssh_address = 'cerbero-deps-uploader@artifacts.gstreamer-foundation.net'
+ build_dir = '/builds/%s/cerbero/cerbero-build'
+ deps_filename = 'cerbero-deps.tar.xz'
+ log_filename = 'cerbero-deps.log'
+ log_size = 10
+
+ def __init__(self, args=[]):
+ args.append(ArgparseArgument('--commit', action='store', type=str,
+ default='HEAD', help=_('the commit to pick artifact from')))
+ args.append(ArgparseArgument('--branch', action='store', type=str,
+ default='master', help=_('Git branch to search from')))
+ Command.__init__(self, args)
+
+ # FIXME: move this to utils
+ def checksum(self, fname):
+ h = sha256()
+ with open(fname, 'rb') as f:
+ # Read in chunks of 512k till f.read() returns b'' instead of reading
+ # the whole file at once which will fail on systems with low memory
+ for block in iter(lambda: f.read(512 * 1024), b''):
+ h.update(block)
+ return h.hexdigest()
+
+ def get_git_sha(self, args):
+ git_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
+ return git.get_hash(git_dir, args.commit)
+
+ def json_get(self, url):
+ m.message("GET %s" % url)
+
+ tmpdir = tempfile.mkdtemp()
+ tmpfile = os.path.join(tmpdir, 'deps.json')
+ shell.download(url, destination=tmpfile)
+
+ with open(tmpfile, 'r') as f:
+ resp = f.read()
+ shutil.rmtree(tmpdir)
+
+ return json.loads(resp)
+
+ def get_distro_and_arch(self, config):
+ distro = config.target_distro
+ arch = config.target_arch
+ if distro == Distro.REDHAT:
+ distro = 'fedora'
+ if distro == Distro.OS_X:
+ distro = 'macos'
+ if config.cross_compiling():
+ distro = 'cross-' + distro
+ return distro, arch
+
+ def make_url(self, config, args, filename):
+ branch = args.branch
+ distro, arch = self.get_distro_and_arch(config)
+ base_url = self.base_url % (branch, distro, arch)
+ return "%s/%s" % (base_url, filename)
+
+ def get_deps(self, config, args):
+ url = self.make_url(config, args, self.log_filename)
+ deps = []
+
+ try:
+ deps = self.json_get(url)
+ except FatalError as e:
+ m.warning("Could not get cache list: %s" % e.msg)
+
+ return deps
+
+ def get_deps_filename(self, config):
+ return os.path.join(config.home_dir, self.deps_filename)
+
+ def get_log_filename(self, config):
+ return os.path.join(config.home_dir, self.log_filename)
+
+ def run(self, config, args):
+ if not config.uninstalled:
+ raise FatalError(_("fetch-cache is only available with "
+ "cerbero-uninstalled"))
+
+class FetchCache(BaseCache):
+ doc = N_('Fetch a cached build from external storage based on cerbero git '
+ 'revision.')
+ name = 'fetch-cache'
+
+ def __init__(self, args=[]):
+ args.append(ArgparseArgument('--namespace', action='store', type=str,
+ default='gstreamer', help=_('GitLab namespace to search from')))
+ BaseCache.__init__(self, args)
+
+ def find_dep(self, deps, sha):
+ for dep in deps:
+ if dep['commit'] == sha:
+ return dep
+
+ m.warning("Did not find cache for commit %s" % sha)
+ return None
+
+ def fetch_dep(self, config, dep, namespace):
+ try:
+ dep_path = os.path.join(config.home_dir, os.path.basename(dep['url']))
+ shell.download(dep['url'], dep_path, check_cert=True, overwrite=True)
+ if dep['checksum'] == self.checksum(dep_path):
+ shell.unpack(dep_path, config.home_dir)
+ else:
+ m.warning("Corrupted dependency file, ignoring.")
+ os.remove(dep_path)
+
+ origin = self.build_dir % namespace
+ m.message("Relocating from %s to %s" % (origin, config.home_dir))
+ # FIXME: Just a quick hack forever
+ shell.call(("grep -lnrIU %(origin)s | xargs "
+ "sed \"s#%(origin)s#%(dest)s#g\" -i") % {
+ 'origin': origin, 'dest': config.home_dir},
+ config.home_dir)
+ except FatalError as e:
+ m.warning("Could not retrieve dependencies for commit %s: %s" % (
+ dep['commit'], e.msg))
+
+ def run(self, config, args):
+ BaseCache.run(self, config, args)
+
+ sha = self.get_git_sha(args)
+ deps = self.get_deps(config, args)
+ dep = self.find_dep(deps, sha)
+ if dep:
+ self.fetch_dep(config, dep, args.namespace)
+
+class GenCache(BaseCache):
+ doc = N_('Generate build cache from current state.')
+ name = 'gen-cache'
+
+ def __init__(self, args=[]):
+ BaseCache.__init__(self, args)
+
+ def gen_dep(self, config, args, deps, sha):
+ deps_filename = self.get_deps_filename(config)
+ if os.path.exists(deps_filename):
+ os.remove(deps_filename)
+
+ log_filename = self.get_log_filename(config)
+ if os.path.exists(log_filename):
+ os.remove(log_filename)
+
+ # Workaround special mangling for windows hidden in the config
+ arch = os.path.basename(config.sources)
+ try:
+ shell.new_call(
+ ['tar',
+ '-C', config.home_dir,
+ '--use-compress-program=xz --threads=0',
+ '--exclude=var/tmp',
+ '-cf', deps_filename,
+ 'build-tools',
+ config.build_tools_cache,
+ os.path.join('dist', arch),
+ config.cache_file])
+ url = self.make_url(config, args, '%s-%s' % (sha, self.deps_filename))
+ deps.insert(0, {'commit': sha, 'checksum': self.checksum(deps_filename), 'url': url})
+ deps = deps[0:self.log_size]
+ with open(log_filename, 'w') as outfile:
+ json.dump(deps, outfile, indent=1)
+ except FatalError:
+ os.remove(deps_filename)
+ os.remove(log_filename)
+ raise
+
+ def run(self, config, args):
+ BaseCache.run(self, config, args)
+
+ sha = self.get_git_sha(args)
+ deps = self.get_deps(config, args)
+ self.gen_dep(config, args, deps, sha)
+
+class UploadCache(BaseCache):
+ doc = N_('Build build cache to external storage.')
+ name = 'upload-cache'
+
+ def __init__(self, args=[]):
+ BaseCache.__init__(self, args)
+
+ def upload_dep(self, config, args, deps):
+ sha = self.get_git_sha(args)
+ for dep in deps:
+ if dep['commit'] == sha:
+ m.message('Cache already uploaded for this commit.')
+ return
+
+ tmpdir = tempfile.mkdtemp()
+ private_key = os.getenv('CERBERO_PRIVATE_SSH_KEY');
+ private_key_path = os.path.join(tmpdir, 'id_rsa')
+
+ deps_filename = self.get_deps_filename(config)
+ log_filename = self.get_log_filename(config)
+ if not os.path.exists(deps_filename) or not os.path.exists(log_filename):
+ raise FatalError(_('gen-cache must be run before running upload-cache.'))
+
+ try:
+ # Setup tempory private key from env
+ ssh_opt = ['-o', 'StrictHostKeyChecking=no']
+ if private_key:
+ with os.fdopen(os.open(private_key_path, os.O_WRONLY | os.O_CREAT, 0o600), 'w') as f:
+ f.write(private_key)
+ f.write("\n")
+ f.close()
+ ssh_opt += ['-i', private_key_path]
+ ssh_cmd = ['ssh'] + ssh_opt + [self.ssh_address]
+ scp_cmd = ['scp'] + ssh_opt
+
+ # Ensure directory sturcture is in place
+ branch = args.branch
+ distro, arch = self.get_distro_and_arch(config)
+ base_dir = os.path.join(branch, distro, arch)
+ shell.new_call(ssh_cmd + ['mkdir -p %s' % base_dir ])
+
+ # Upload the deps files first
+ remote_deps_filename = os.path.join(base_dir, '%s-%s' % (sha, self.deps_filename))
+ shell.new_call(scp_cmd + [deps_filename, '%s:%s' % (self.ssh_address, remote_deps_filename)])
+
+ # Upload the new log
+ remote_tmp_log_filename = os.path.join(base_dir, '%s-%s' % (sha, self.log_filename))
+ shell.new_call(scp_cmd + [log_filename,
+ '%s:%s' % (self.ssh_address, remote_tmp_log_filename)])
+
+ # Override the new log in a way that we reduce the risk of corrupted
+ # fetch.
+ remote_log_filename = os.path.join(base_dir, self.log_filename)
+ shell.new_call(ssh_cmd + ['mv', '-f', remote_tmp_log_filename, remote_log_filename])
+
+ # Now remove the obsoleted dep file if needed
+ for dep in deps[self.log_size - 1:]:
+ old_remote_deps_filename = os.path.join(base_dir, os.path.basename(dep['url']))
+ shell.new_call(ssh_cmd + ['rm', '-f', old_remote_deps_filename])
+ finally:
+ shutil.rmtree(tmpdir)
+
+ def run(self, config, args):
+ BaseCache.run(self, config, args)
+ deps = self.get_deps(config, args)
+ self.upload_dep(config, args, deps)
+
+register_command(FetchCache)
+register_command(GenCache)
+register_command(UploadCache)
diff --git a/cerbero/commands/fetch.py b/cerbero/commands/fetch.py
index b0114363..45fd613d 100644
--- a/cerbero/commands/fetch.py
+++ b/cerbero/commands/fetch.py
@@ -16,12 +16,7 @@
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
-import os, sys
-import urllib
-import json
import asyncio
-import tempfile
-import shutil
from cerbero.commands import Command, register_command
from cerbero.build.cookbook import CookBook
@@ -30,7 +25,6 @@ from cerbero.packages.packagesstore import PackagesStore
from cerbero.utils import _, N_, ArgparseArgument, remove_list_duplicates, git, shell
from cerbero.utils import messages as m
from cerbero.build.source import Tarball
-from cerbero.config import Distro
class Fetch(Command):
@@ -134,125 +128,5 @@ class FetchPackage(Fetch):
args.deps, args.reset_rdeps, args.full_reset,
args.print_only)
-class FetchCache(Command):
- doc = N_('Fetch a cached build from GitLab CI based on cerbero git '
- 'revision.')
- name = 'fetch-cache'
-
- base_url = 'https://gitlab.freedesktop.org/%s/cerbero/-/jobs'
- build_dir = '/builds/%s/cerbero/cerbero-build'
- log_size = 50
-
- def __init__(self, args=[]):
- args.append(ArgparseArgument('--commit', action='store', type=str,
- default='HEAD', help=_('the commit to pick artifact from')))
- args.append(ArgparseArgument('--namespace', action='store', type=str,
- default='gstreamer', help=_('GitLab namespace to search from')))
- args.append(ArgparseArgument('--branch', action='store', type=str,
- default='master', help=_('Git branch to search from')))
- args.append(ArgparseArgument('--job-id', action='store', type=str,
- default='master', help=_('Artifact job id, this will skip'
- ' commit matching')))
- args.append(ArgparseArgument('--skip-fetch', action='store_true',
- default=False, help=_('Skip fetching cached build, the '
- 'commit/url log will be updated if --job-id is present')))
- Command.__init__(self, args)
-
- def request(self, url, values, token=None):
- headers = {}
- if token:
- headers = {"Private-Token": token}
-
- data = urllib.parse.urlencode(values)
- url = "%s?%s" % (url, data)
-
- m.message("GET %s" % url)
-
- tmpdir = tempfile.mkdtemp()
- tmpfile = os.path.join(tmpdir, 'deps.json')
-
- try:
- shell.download(url, destination=tmpfile)
- except urllib.error.URLError as e:
- raise FatalError(_(e.reason))
-
- with open(tmpfile, 'r') as f:
- resp = f.read()
- shutil.rmtree(tmpdir)
-
- return json.loads(resp)
-
- def get_deps(self, config, args):
- namespace = args.namespace
- branch = args.branch
- distro = config.target_distro
- arch = config.target_arch
- if distro == Distro.REDHAT:
- distro = 'fedora'
- if distro == Distro.OS_X:
- distro = 'macos'
-
- base_url = self.base_url % namespace
- url = "%s/artifacts/%s/raw/cerbero-build/cerbero-deps.log" % (base_url, branch)
-
- deps = []
- try:
- deps = self.request(url, values = {
- 'job': "cerbero deps %s %s" % (distro, arch)
- })
- except FatalError as e:
- m.warning("Could not get cache list: %s" % e.msg)
-
- return deps
-
- def find_dep(self, deps, sha):
- for dep in deps:
- if dep['commit'] == sha:
- return dep
-
- m.warning("Did not find cache for commit %s" % sha)
- return None
-
- def fetch_dep(self, config, dep, namespace):
- try:
- artifacts_path = "%s/cerbero-deps.tar.gz" % config.home_dir
- shell.download(dep['url'], artifacts_path, check_cert=True, overwrite=True)
- shell.unpack(artifacts_path, config.home_dir)
- os.remove(artifacts_path)
- origin = self.build_dir % namespace
- m.message("Relocating from %s to %s" % (origin, config.home_dir))
- # FIXME: Just a quick hack for now
- shell.call(("grep -lnrIU %(origin)s | xargs "
- "sed \"s#%(origin)s#%(dest)s#g\" -i") % {
- 'origin': origin, 'dest': config.home_dir},
- config.home_dir)
- except FatalError as e:
- m.warning(("Could not retrieve artifact for commit %s (the artifact "
- "may have expired): %s") % (dep['commit'], e.msg))
-
- def update_log(self, config, args, deps, sha):
- base_url = self.base_url % args.namespace
- url = "%s/%s/artifacts/raw/cerbero-deps.tar.gz" % (base_url, args.job_id)
- deps.insert(0, {'commit': sha, 'url': url})
- deps = deps[0:self.log_size]
- with open("%s/cerbero-deps.log" % config.home_dir, 'w') as outfile:
- json.dump(deps, outfile, indent=1)
-
- def run(self, config, args):
- if not config.uninstalled:
- raise FatalError(_("fetch-cache is only available with "
- "cerbero-uninstalled"))
-
- git_dir = os.path.abspath(os.path.dirname(sys.argv[0]))
- sha = git.get_hash(git_dir, args.commit)
- deps = self.get_deps(config, args)
- if not args.skip_fetch:
- dep = self.find_dep(deps, sha)
- if dep:
- self.fetch_dep(config, dep, args.namespace)
- if args.job_id:
- self.update_log(config, args, deps, sha)
-
register_command(FetchRecipes)
register_command(FetchPackage)
-register_command(FetchCache)