support/package-builder/Scheduler.py
87815216
 import threading
 from queue import PriorityQueue
620867de
 import json
326d5ca8
 from ThreadPool import ThreadPool
d024e640
 from constants import constants
 from Logger import Logger
45c9260c
 from SpecData import SPECS
c7c68b70
 from StringUtils import StringUtils
d024e640
 
d00518dd
 
 class DependencyGraphNode(object):
 
     def __init__(self, packageName, packageVersion, pkgWeight):
 
         self.packageName = packageName
         self.packageVersion = packageVersion
 
         self.buildRequiresPkgNodes = set() # Same as in spec file
         self.installRequiresPkgNodes = set() # Same as in spec file
 
         # Auxiliary build-requires packages.
         #
         # This is the result of "moving up" the (weak)
         # install-requires package dependencies to their ancestors
         # that actually need them as a build dependency (more details
         # in the code below). This is used to optimize the dependency
         # graph by reorganizing parent-child relationships based on
         # strong dependencies.
         self.auxBuildRequiresPkgNodes = set()
 
         # Accumulated install-requires packages.
         #
         # This is mostly used as a helper when building the graph
         # (specifically, as an intermediate step when computing
         # auxBuildRequiredPkgNodes), and is later unused.
         self.accumInstallRequiresPkgNodes = set()
 
         self.childPkgNodes = set() # Packages that I depend on.
         self.parentPkgNodes = set() # Packages that depend on me.
 
         self.selfWeight = pkgWeight # Own package weight.
 
         # Critical-chain-weight: The key scheduling metric.
         #
         # Weight of the critical chain that can be built starting from
         # this package. Higher the criticalChainWeight, more the
         # benefit from building this package as early as possible.
         self.criticalChainWeight = 0
 
         # Internal data-structure used to perform controlled
         # traversals of the dependency graph, as well as certain
         # sanity checks.
         self.numVisits = 0
 
 
d024e640
 class Scheduler(object):
87815216
 
     lock = threading.Lock()
326d5ca8
     listOfAlreadyBuiltPackages = set()
87815216
     listOfPackagesToBuild = []
326d5ca8
     listOfPackagesCurrentlyBuilding = set()
87815216
     sortedList = []
     listOfPackagesNextToBuild = PriorityQueue()
     listOfFailedPackages = []
620867de
     priorityMap = {}
87815216
     pkgWeights = {}
     logger = None
     event = None
     stopScheduling = False
d00518dd
     mapPackagesToGraphNodes = {}
87815216
 
d024e640
     @staticmethod
     def setEvent(event):
87815216
         Scheduler.event = event
 
d024e640
     @staticmethod
26b55679
     def setLog(logName, logPath, logLevel):
         Scheduler.logger = Logger.getLogger(logName, logPath, logLevel)
620867de
 
     @staticmethod
326d5ca8
     def setParams(sortedList, listOfAlreadyBuiltPackages):
         Scheduler.sortedList = sortedList
d00518dd
 
326d5ca8
         Scheduler.listOfAlreadyBuiltPackages = listOfAlreadyBuiltPackages
2f2be50d
 
         for pkg in Scheduler.sortedList:
             pkgName, pkgVersion = StringUtils.splitPackageNameAndVersion(pkg)
             if (pkg not in Scheduler.listOfAlreadyBuiltPackages
                or pkgName in constants.testForceRPMS):
                 Scheduler.listOfPackagesToBuild.append(pkg)
 
326d5ca8
         Scheduler.listOfPackagesCurrentlyBuilding = set()
         Scheduler.listOfPackagesNextToBuild = PriorityQueue()
         Scheduler.listOfFailedPackages = []
2f2be50d
 
         # When performing (only) make-check, package dependencies are
         # irrelevant; i.e., all the packages can be "make-checked" in
         # parallel. So skip building the dependency graph. This is not
         # merely an optimization! A given package can define
         # additional packages to be installed in its build environment
         # when performing a make-check, under %if %{with_check}.
         # However, these are not really build-time-dependencies in the
         # usual sense; i.e., there is no ordering requirement when
         # building these packages; they only make sense when running a
         # `make check`. Hence, trying to build a dependency graph out
         # of them will result in anomalies such as cycles in the
         # graph. So skip building the graph altogether and schedule
         # all the `make check`s in parallel.
         skipGraphBuild = constants.rpmCheck
         Scheduler._setPriorities(skipGraphBuild)
 
279800ea
         if constants.publishBuildDependencies:
             # This must be called only after calling _setPriorities(),
             # which builds the dependency graph.
             Scheduler._publishBuildDependencies()
 
326d5ca8
 
     @staticmethod
     def notifyPackageBuildCompleted(package):
         with Scheduler.lock:
             if package in Scheduler.listOfPackagesCurrentlyBuilding:
                 Scheduler.listOfPackagesCurrentlyBuilding.remove(package)
                 Scheduler.listOfAlreadyBuiltPackages.add(package)
 
     @staticmethod
     def notifyPackageBuildFailed(package):
         with Scheduler.lock:
             if package in Scheduler.listOfPackagesCurrentlyBuilding:
                 Scheduler.listOfPackagesCurrentlyBuilding.remove(package)
                 Scheduler.listOfFailedPackages.append(package)
 
     @staticmethod
     def isAllPackagesBuilt():
         if Scheduler.listOfPackagesToBuild:
             return False
         return True
 
     @staticmethod
     def isAnyPackagesFailedToBuild():
         if Scheduler.listOfFailedPackages:
             return True
         return False
 
     @staticmethod
     def getNextPackageToBuild():
         with Scheduler.lock:
             if Scheduler.stopScheduling:
                 return None
 
             if not Scheduler.listOfPackagesToBuild:
                 if Scheduler.event is not None:
                     Scheduler.event.set()
 
             if Scheduler.listOfPackagesNextToBuild.empty():
                 Scheduler._getListNextPackagesReadyToBuild()
 
             if Scheduler.listOfPackagesNextToBuild.empty():
                 return None
 
             packageTup = Scheduler.listOfPackagesNextToBuild.get()
 
             package = packageTup[1]
             if Scheduler.listOfPackagesNextToBuild.qsize() > 0:
                 ThreadPool.activateWorkerThreads(
                     Scheduler.listOfPackagesNextToBuild.qsize())
             Scheduler.listOfPackagesCurrentlyBuilding.add(package)
             Scheduler.listOfPackagesToBuild.remove(package)
             return package
 
     @staticmethod
f67fb5e3
     def getDoneList():
         return list(Scheduler.listOfAlreadyBuiltPackages)
 
d00518dd
 
f67fb5e3
     @staticmethod
279800ea
     def _publishBuildDependencies():
             Scheduler.logger.debug("Publishing Build dependencies")
             dependencyLists = {}
             for package in list(Scheduler.mapPackagesToGraphNodes.keys()):
                 dependencyLists[package] = []
                 pkgNode = Scheduler.mapPackagesToGraphNodes[package]
                 for childPkg in list(pkgNode.childPkgNodes):
                     dependencyLists[package].append(childPkg.packageName + "-" + childPkg.packageVersion)
             with open(str(constants.logPath) + "/BuildDependencies.json", 'w') as graphfile:
                 graphfile.write(json.dumps(dependencyLists, sort_keys=True, indent=4))
 
 
     @staticmethod
61f48156
     def __getRequiredTypePackages(pkg, requiresType):
         listRPMPackages = []
         if requiresType == "build":
             listRPMPackages.extend(SPECS.getData().getBuildRequiresForPkg(pkg))
         elif requiresType == "install":
             listRPMPackages.extend(SPECS.getData().getRequiresAllForPkg(pkg))
 
         # Remove duplicates.
         listRPMPackages = list(set(listRPMPackages))
620867de
 
61f48156
         listPackages = set()
620867de
 
61f48156
         for reqPkg in listRPMPackages:
f93ef2b0
             basePkg = SPECS.getData().getBasePkg(reqPkg)
61f48156
             listPackages.add(basePkg)
 
         return list(listPackages)
620867de
 
61f48156
 
     @staticmethod
     def _getBuildRequiredPackages(pkg):
         return Scheduler.__getRequiredTypePackages(pkg, "build")
 
 
     @staticmethod
     def _getRequiredPackages(pkg):
         return Scheduler.__getRequiredTypePackages(pkg, "install")
620867de
 
d00518dd
 
     def _createGraphNodes():
 
         # GRAPH-BUILD STEP 1: Initialize graph nodes for each package.
         #
         # Create a graph with a node to represent every package and all
         # its dependent packages in the given list.
         for package in Scheduler.sortedList:
             packageName, packageVersion = StringUtils.splitPackageNameAndVersion(package)
             node = DependencyGraphNode(packageName, packageVersion,
                                        Scheduler._getWeight(package))
             Scheduler.mapPackagesToGraphNodes[package] = node
 
         for package in Scheduler.sortedList:
             pkgNode = Scheduler.mapPackagesToGraphNodes[package]
             for childPackage in Scheduler._getBuildRequiredPackages(package):
                 childPkgNode = Scheduler.mapPackagesToGraphNodes[childPackage]
                 pkgNode.buildRequiresPkgNodes.add(childPkgNode)
 
             for childPackage in Scheduler._getRequiredPackages(package):
                 childPkgNode = Scheduler.mapPackagesToGraphNodes[childPackage]
                 pkgNode.installRequiresPkgNodes.add(childPkgNode)
 
         # GRAPH-BUILD STEP 2: Mark package dependencies in the graph.
         #
         # Add parent-child relationships between dependent packages.
         # If a package 'A' build-requires or install-requires package 'B', then:
         #   - Mark 'B' as a child of 'A' in the graph.
         #   - Mark 'A' as a parent of 'B' in the graph.
         #
         #                     A
         #
         #                  /     \
         #                 v       v
         #
         #                B         C
         #
         for package in Scheduler.sortedList:
             pkgNode = Scheduler.mapPackagesToGraphNodes[package]
             for childPkgNode in pkgNode.buildRequiresPkgNodes:
                 pkgNode.childPkgNodes.add(childPkgNode)
                 childPkgNode.parentPkgNodes.add(pkgNode)
 
             for childPkgNode in pkgNode.installRequiresPkgNodes:
                 pkgNode.childPkgNodes.add(childPkgNode)
                 childPkgNode.parentPkgNodes.add(pkgNode)
 
     def _optimizeGraph():
 
         # GRAPH-BUILD STEP 3: Convert weak (install-requires) dependencies
         #                     into strong (aux-build-requires) dependencies.
         #
         # Consider the following graph on the left, where package 'A'
         # install-requires 'B' and build-requires 'C'.  Package 'C'
         # install-requires 'D'. Package 'D' build-requires 'E' and
         # install-requires 'F'.
         #
         #  b     : build-requires dependency
         #  i     : install-requires dependency
         #  aux-b : auxiliary build-requires dependency (explained later)
         #
         # Now, we know that install-requires dependencies are weaker
         # than build-requires dependencies. That is, for example, in the
         # original graph below, package 'B' does not need to be built
         # before package 'A', but package 'C' must be built before
         # package 'A'.
         #
         # Using this knowledge, we optimize the graph by re-organizing
         # the dependencies such that all of them are strong (we call
         # these newly computed build-dependencies as "auxiliary build
         # dependencies"). The optimized graph for the example below is
         # presented on the right -- the key property of the optimized
         # graph is that every child package *MUST* be built before its
         # parent(s). This process helps relax package dependencies to
         # a great extent, by giving us the flexibility to delay
         # building certain packages until they are actually needed.
         # Another important benefit of this optimization is that it
         # nullifies certain dependencies altogether (eg: A->B), thereby
         # enabling a greater level of build-parallelism.
         #
         #      Original Graph                  Optimized Graph
         #                             +
         #          A                  |       B              A
         #                             +
         #       i / \ b               |                b/    |aux-b  \aux-b
         #        /   \                +                /     |        \
         #       v     v               |               v      v         v
         #                             +
         #      B        C             |              C       D          F
         #                             +
         #                \i           |                   b/
         #                 \           +                   /
         #                  v          |                  v
         #                             +
         #                  D          |                 E
         #                             +
         #                b/  \i       |
         #                /    \       +
         #               v      v      |
         #                             +
         #              E        F     |
         #
         #
         # In the code below, we use 'accumulated-install-requires' set
         # as a placeholder to bubble-up install-requires dependencies of
         # each package to all its ancestors. In each such path, we look
         # for the nearest ancestor that has a build-requires dependency
         # on that path going up from the given package to that ancestor.
         # If we find such an ancestor, we convert the bubbled-up
         # install-requires packages accumulated so far into the
         # auxiliary-build-requires set at that ancestor. (This is how
         # 'D' and 'F' become aux-build-requires of 'A' in the optimized
         # graph above).
         #
         # Graph Traversal : Bottom-up (starting with packages that
         #                   have no children).
         #
         nodesToVisit = set()
         for package in Scheduler.sortedList:
             pkgNode = Scheduler.mapPackagesToGraphNodes[package]
             if len(pkgNode.childPkgNodes) == 0:
                 nodesToVisit.add(pkgNode)
 
         while nodesToVisit:
             pkgNode = nodesToVisit.pop()
 
             pkgNode.accumInstallRequiresPkgNodes |= pkgNode.installRequiresPkgNodes
 
             if len(pkgNode.childPkgNodes) == 0:
                 # Count self-visit if you don't expect any other
                 # visitors.
                 pkgNode.numVisits += 1
 
             for parentPkgNode in pkgNode.parentPkgNodes:
                 if (pkgNode not in parentPkgNode.buildRequiresPkgNodes) and \
                    (pkgNode not in parentPkgNode.installRequiresPkgNodes):
                     raise Exception ("Visitor to parent is not its child " + \
                                      " Visitor: " + pkgNode.packageName + \
                                      " Parent:  " + parentPkgNode.packageName)
 
                 if pkgNode in parentPkgNode.buildRequiresPkgNodes:
                     parentPkgNode.auxBuildRequiresPkgNodes |= pkgNode.accumInstallRequiresPkgNodes
                 else:
                     parentPkgNode.accumInstallRequiresPkgNodes |= pkgNode.accumInstallRequiresPkgNodes
 
                 parentPkgNode.numVisits += 1
                 # Each child is expected to visit the parent once.
                 # Note that a package might have the same packages as
                 # both build-requires and install-requires children.
                 # They don't count twice.
                 numExpectedVisits = len(parentPkgNode.childPkgNodes)
                 if parentPkgNode.numVisits == numExpectedVisits:
                     nodesToVisit.add(parentPkgNode)
                 elif parentPkgNode.numVisits > numExpectedVisits:
                     raise Exception ("Parent node visit count > num of children " + \
                                      " Parent node: " + parentPkgNode.packageName + \
                                      " Visit count: " + str(parentPkgNode.numVisits) + \
                                      " Num of children: " + str(numExpectedVisits))
 
             pkgNode.accumInstallRequiresPkgNodes.clear()
 
         # Clear out the visit counter for reuse.
         for package in Scheduler.sortedList:
             pkgNode = Scheduler.mapPackagesToGraphNodes[package]
             if pkgNode.numVisits == 0:
                 raise Exception ("aux-build-requires calculation never visited " \
                                  "package " + pkgNode.packageName)
             else:
                 pkgNode.numVisits = 0
 
         # GRAPH-BUILD STEP 4: Re-organize the dependencies in the graph based on
         #                     the above optimization.
         #
         # Now re-arrange parent-child relationships between packages using the
         # following criteria:
         # If a package 'A' build-requires or aux-build-requires package 'B', then:
         #   - Mark 'B' as a child of 'A' in the graph.
         #   - Mark 'A' as a parent of 'B' in the graph.
         # If a package 'A' only install-requires package 'B', then:
         #   - Remove 'B' as a child of 'A' in the graph.
         #   - Remove 'A' as a parent of 'B' in the graph.
         # No node should have a non-zero accum-install-requires set.
 
         for package in Scheduler.sortedList:
             pkgNode = Scheduler.mapPackagesToGraphNodes[package]
             childPkgNodesToRemove = set()
             for childPkgNode in pkgNode.childPkgNodes:
                 if (childPkgNode not in pkgNode.buildRequiresPkgNodes) and \
                    (childPkgNode not in pkgNode.auxBuildRequiresPkgNodes):
                        # We can't modify a set during iteration, so we
                        # accumulate the set of children we want to
                        # remove, and delete them after the for-loop.
                        childPkgNodesToRemove.add(childPkgNode)
                        childPkgNode.parentPkgNodes.remove(pkgNode)
 
             pkgNode.childPkgNodes = pkgNode.childPkgNodes - \
                                     childPkgNodesToRemove
 
             for newChildPkgNode in pkgNode.auxBuildRequiresPkgNodes:
                 pkgNode.childPkgNodes.add(newChildPkgNode)
                 newChildPkgNode.parentPkgNodes.add(pkgNode)
 
 
     def _calculateCriticalChainWeights():
 
         # GRAPH-BUILD STEP 5: Calculate critical-chain-weight of packages.
         #
         # Calculation of critical-chain-weight (the key scheduling
         # metric):
         # --------------------------------------------------------
         # Let us define a "chain" of a given package to be the
         # sequence of parent packages that can be built starting from
         # that package. For example, if a package 'A' build-requires
         # 'B', which in turn build-requires 'C', then one of the
         # chains of 'C' is C->B->A. Now, if there are
         # multiple such chains possible from 'C', then we define the
         # "critical-chain" of 'C' to be the longest of those chains,
         # where "longest" is determined by the time it takes to build
         # all the packages in that chain. The build-times of any two
         # chains can be compared based on the sum of the
         # individual weights of each package in their respective
         # chains.
         #
         # Below, we calculate the critical-chain-weight of each
         # package (which is the maximum weight of all the paths
         # leading up to that package). Later on, we will schedule
         # package-builds by the decreasing order of the packages'
         # critical-chain-weight.
         #
         #
         #               ...  ...        ...
         #                 \   |         /
         #                  v  v        v
         #
         #                     A        B        C
         #
         #                      \       |       /
         #                       \      |      /
         #                        v     v     v
         #
         #                              D
         #
         #                            /
         #                           /
         #                          v
         #
         #                          E
         #
         #
         # In the above graph, the critical chain weight of 'D' is
         # computed as:
         # criticalChainWeight(D) = weight(D) +
         #                          max (criticalChainWeight(A),
         #                               criticalChainWeight(B),
         #                               weight(C))
         #
         # Graph Traversal : Top-down (starting with packages that
         #                   have no parents).
         #
         nodesToVisit = set()
         for package in Scheduler.sortedList:
             pkgNode = Scheduler.mapPackagesToGraphNodes[package]
             if len(pkgNode.parentPkgNodes) == 0:
                 nodesToVisit.add(pkgNode)
 
         while nodesToVisit:
             pkgNode = nodesToVisit.pop()
 
             if len(pkgNode.parentPkgNodes) == 0:
                 pkgNode.criticalChainWeight = pkgNode.selfWeight
                 # Count self-visit if you don't expect any other
                 # visitors.
                 pkgNode.numVisits += 1
 
             for childPkgNode in pkgNode.childPkgNodes:
                 if pkgNode not in childPkgNode.parentPkgNodes:
                     raise Exception ("Visitor to child node is not its parent " + \
                                      " Visitor: " + pkgNode.packageName + \
                                      " Child node: " + childPkgNode.packageName)
 
                 if childPkgNode.numVisits == len(childPkgNode.parentPkgNodes):
                     raise Exception ("Child node visit count > number of parents " + \
                                      " Child node: " + childPkgNode.packageName + \
                                      " Visit count: " + childPkgNode.numVisits + \
                                      " Num of parents: " + \
                                      str(len(childPkgNode.parentPkgNodes)))
 
                 childPkgNode.criticalChainWeight = max(
                     childPkgNode.criticalChainWeight,
                     pkgNode.criticalChainWeight + childPkgNode.selfWeight)
 
                 childPkgNode.numVisits += 1
                 # We can visit this package's children only after this
                 # package has been visited by all its parents (thus
                 # guaranteeing that its criticalChainWeight has
                 # stabilized).
                 if childPkgNode.numVisits == len(childPkgNode.parentPkgNodes):
                     nodesToVisit.add(childPkgNode)
 
         # Clear out the visit counter for reuse.
         for package in Scheduler.sortedList:
             pkgNode = Scheduler.mapPackagesToGraphNodes[package]
             if pkgNode.numVisits == 0:
                 raise Exception ("critical-chain-weight calculation never visited " + \
                                  "package " + pkgNode.packageName)
             else:
                 pkgNode.numVisits = 0
 
 
     def _buildGraph():
         Scheduler._createGraphNodes()
         Scheduler._optimizeGraph()
         Scheduler._calculateCriticalChainWeights()
 
 
620867de
     @staticmethod
326d5ca8
     def _parseWeights():
87815216
         Scheduler.pkgWeights.clear()
326d5ca8
         with open(constants.packageWeightsPath, 'r') as weightFile:
             Scheduler.pkgWeights = json.load(weightFile)
620867de
 
c7c68b70
     # A package's weight is an indicator of the time required to build
     # that package, relative to other packages. These weights do not
     # take build-time/install-time dependencies into account -- they
     # are the individual build-times of the respective packages.
     # Package weights are positive integers, with a default value of 1.
620867de
     @staticmethod
326d5ca8
     def _getWeight(package):
c7c68b70
 	# Package weights are assumed to be independent of package
 	# version (i.e., in the case of multi-version packages such as
 	# Go or Kubernetes, all the versions have the same weight). So
 	# convert packageName-version to packageName before looking up
 	# the package weight.
         package, _ = StringUtils.splitPackageNameAndVersion(package)
620867de
         try:
c7c68b70
             return int(Scheduler.pkgWeights[package]) + 1
620867de
         except KeyError:
c7c68b70
             return 1
620867de
 
     @staticmethod
98200cc1
     def _getPriority(package):
         try:
c7c68b70
             return int(Scheduler.priorityMap[package])
98200cc1
         except KeyError:
             return 0
 
 
     @staticmethod
2f2be50d
     def _setPriorities(skipGraphBuild):
         if skipGraphBuild:
             for package in Scheduler.sortedList:
                 Scheduler.priorityMap[package] = 0
         else:
             Scheduler._parseWeights()
             Scheduler._buildGraph()
 
             for package in Scheduler.sortedList:
                 pkgNode = Scheduler.mapPackagesToGraphNodes[package]
                 Scheduler.priorityMap[package] = pkgNode.criticalChainWeight
d00518dd
 
279800ea
         Scheduler.logger.debug("set Priorities: Priority of all packages")
         Scheduler.logger.debug(Scheduler.priorityMap)
620867de
 
510e37f4
 
d024e640
     @staticmethod
326d5ca8
     def _getListNextPackagesReadyToBuild():
d024e640
         for pkg in Scheduler.listOfPackagesToBuild:
             if pkg in Scheduler.listOfPackagesCurrentlyBuilding:
                 continue
59aec3bb
             listRequiredSubPackages = list(set(SPECS.getData().getBuildRequiresForPkg(pkg) + \
                                    SPECS.getData().getRequiresAllForPkg(pkg)))
 
             # extend to full Requires tree
             for p in listRequiredSubPackages:
                 reqs = SPECS.getData().getRequiresAllForPkg(p)
                 for r in reqs:
                     if r not in listRequiredSubPackages:
                         listRequiredSubPackages.append(r)
 
             # convert subpackages to basepkg
             listRequiredPackages = set()
             for p in listRequiredSubPackages:
                 listRequiredPackages.add(SPECS.getData().getBasePkg(p))
d00518dd
 
87815216
             canBuild = True
d024e640
             for reqPkg in listRequiredPackages:
                 if reqPkg not in Scheduler.listOfAlreadyBuiltPackages:
87815216
                     canBuild = False
d024e640
                     break
             if canBuild:
98200cc1
                 Scheduler.listOfPackagesNextToBuild.put((-Scheduler._getPriority(pkg), pkg))
26b55679
                 Scheduler.logger.debug("Adding " + pkg + " to the schedule list")