[svn] commit: r228 - /branches/f2f200910/src/bin/bind10/bind10.py
BIND 10 source code commits
bind10-changes at lists.isc.org
Fri Oct 30 16:42:16 UTC 2009
Author: shane
Date: Fri Oct 30 16:42:16 2009
New Revision: 228
Log:
Add class to track process information.
Restart killed processes.
Modified:
branches/f2f200910/src/bin/bind10/bind10.py
Modified: branches/f2f200910/src/bin/bind10/bind10.py
==============================================================================
--- branches/f2f200910/src/bin/bind10/bind10.py (original)
+++ branches/f2f200910/src/bin/bind10/bind10.py Fri Oct 30 16:42:16 2009
@@ -25,15 +25,39 @@
import re
import errno
import time
+import select
from optparse import OptionParser, OptionValueError
import ISC.CC
# This is the version that gets displayed to the user.
-__version__ = "v20091028 (Paving the DNS Parking Lot)"
+__version__ = "v20091030 (Paving the DNS Parking Lot)"
# Nothing at all to do with the 1990-12-10 article here:
# http://www.subgenius.com/subg-digest/v2/0056.html
+
+class ProcessInfo:
+ """Information about a process"""
+
+ dev_null = open("/dev/null", "w")
+
+ def _spawn(self):
+ self.process = subprocess.Popen(self.args,
+ stdin=subprocess.PIPE,
+ stdout=self.dev_null,
+ stderr=self.dev_null,
+ close_fds=True,
+ env=self.env,)
+ self.pid = self.process.pid
+
+ def __init__(self, name, args, env={}):
+ self.name = name
+ self.args = args
+ self.env = env
+ self._spawn()
+
+ def respawn(self):
+ self._spawn()
class BoB:
"""Boss of BIND class."""
@@ -47,11 +71,10 @@
"""
self.verbose = True
self.c_channel_port = c_channel_port
- self.cc_process = None
self.cc_session = None
self.processes = {}
self.dead_processes = {}
- self.component_processes = {}
+ self.runnable = False
def startup(self):
"""Start the BoB instance.
@@ -59,23 +82,17 @@
Returns None if successful, otherwise an string describing the
problem.
"""
- dev_null = open("/dev/null", "w")
# start the c-channel daemon
if self.verbose:
sys.stdout.write("Starting msgq using port %d\n" % self.c_channel_port)
c_channel_env = { "ISC_MSGQ_PORT": str(self.c_channel_port), }
try:
- c_channel = subprocess.Popen("msgq",
- stdin=subprocess.PIPE,
- stdout=dev_null,
- stderr=dev_null,
- close_fds=True,
- env=c_channel_env,)
+ c_channel = ProcessInfo("msgq", "msgq", c_channel_env)
except:
return "Unable to start msgq"
self.processes[c_channel.pid] = c_channel
if self.verbose:
- sys.stdout.write("Started msgq with PID %d\n" % c_channel.pid)
+ sys.stdout.write("Started msgq (PID %d)\n" % c_channel.pid)
# now connect to the c-channel
cc_connect_start = time.time()
@@ -95,42 +112,30 @@
if self.verbose:
sys.stdout.write("Starting bind-cfgd\n")
try:
- bind_cfgd = subprocess.Popen("bind-cfgd",
- stdin=dev_null,
- stdout=dev_null,
- stderr=dev_null,
- close_fds=True,
- env={},)
+ bind_cfgd = ProcessInfo("bind-cfgd", "bind-cfgd")
except:
- c_channel.kill()
+ c_channel.process.kill()
return "Unable to start bind-cfgd"
self.processes[bind_cfgd.pid] = bind_cfgd
if self.verbose:
- sys.stdout.write("Started bind-cfgd with PID %d\n" % bind_cfgd.pid)
+ sys.stdout.write("Started bind-cfgd (PID %d)\n" % bind_cfgd.pid)
# start the parking lot
# XXX: this must be read from the configuration manager in the future
# XXX: we hardcode port 5300
if self.verbose:
- sys.stdout.write("Starting parkinglot\n")
- try:
- parkinglot = subprocess.Popen(["parkinglot", "-p", "5300",],
- stdin=dev_null,
- stdout=dev_null,
- stderr=dev_null,
- close_fds=True,
- env={},)
+ sys.stdout.write("Starting parkinglot on port 5300\n")
+ try:
+ parkinglot = ProcessInfo("parkinglot", ["parkinglot", "-p", "5300"])
except:
c_channel.kill()
bind_cfgd.kill()
return "Unable to start parkinglot"
self.processes[parkinglot.pid] = parkinglot
if self.verbose:
- sys.stdout.write("Started parkinglot with PID %d\n" % parkinglot.pid)
-
- # remember our super-important process
- self.cc_process = c_channel
-
+ sys.stdout.write("Started parkinglot (PID %d)\n" % parkinglot.pid)
+
+ self.runnable = True
return None
def stop_all_processes(self):
@@ -147,36 +152,37 @@
if self.verbose:
sys.stdout.write("Stopping the server.\n")
# first try using the BIND 10 request to stop
- if self.cc_session:
- try:
- self.stop_all_processes()
- except:
- pass
+ try:
+ self.stop_all_processes()
+ except:
+ pass
time.sleep(0.1) # XXX: some delay probably useful... how much is uncertain
# next try sending a SIGTERM
processes_to_stop = list(self.processes.values())
unstopped_processes = []
- for process in processes_to_stop:
+ for proc_info in processes_to_stop:
if self.verbose:
- sys.stdout.write("Sending SIGTERM to process %d.\n" % process.pid)
- try:
- process.terminate()
+ sys.stdout.write("Sending SIGTERM to %s (PID %d).\n" %
+ (proc_info.name, proc_info.pid))
+ try:
+ proc_info.process.terminate()
except OSError as o:
# ignore these (usually ESRCH because the child
# finally exited)
pass
time.sleep(0.1) # XXX: some delay probably useful... how much is uncertain
- for process in processes_to_stop:
- (pid, exit_status) = os.waitpid(process.pid, os.WNOHANG)
+ for proc_info in processes_to_stop:
+ (pid, exit_status) = os.waitpid(proc_info.pid, os.WNOHANG)
if pid == 0:
- unstopped_processes.append(process)
+ unstopped_processes.append(proc_info)
# finally, send a SIGKILL (unmaskable termination)
processes_to_stop = unstopped_processes
- for process in processes_to_stop:
+ for proc_info in processes_to_stop:
if self.verbose:
- sys.stdout.write("Sending SIGKILL to process %d.\n" % process.pid)
- try:
- process.kill()
+ sys.stdout.write("Sending SIGKILL to %s (PID %d).\n" %
+ (proc_info.name, proc_info.pid))
+ try:
+ proc_info.process.kill()
except OSError as o:
# ignore these (usually ESRCH because the child
# finally exited)
@@ -192,16 +198,41 @@
Returns True if everything is okay, or False if a fatal error
has been detected and the program should exit.
"""
- process = self.processes.pop(pid)
- self.dead_processes[process.pid] = process
- if self.verbose:
- sys.stdout.write("Process %d died.\n" % pid)
- if self.cc_process and (pid == self.cc_process.pid):
+ proc_info = self.processes.pop(pid)
+ self.dead_processes[proc_info.pid] = proc_info
+ if self.verbose:
+ sys.stdout.write("Process %s (PID %d) died.\n" %
+ (proc_info.name, proc_info.pid))
+ if proc_info.name == "msgq":
if self.verbose:
sys.stdout.write("The msgq process died, shutting down.\n")
- return False
- else:
- return True
+ self.runnable = False
+
+ def recv_and_process_cc_msg(self):
+ """Receive and process the next message on the c-channel,
+ if any."""
+ routing, data = self.cc_session.group_recvmsg(False)
+ print("routing", routing)
+ print("data", data)
+
+ def restart_processes(self):
+ """Restart any dead processes."""
+ # XXX: this needs a back-off algorithm
+ still_dead = {}
+ for proc_info in self.dead_processes.values():
+ if self.verbose:
+ sys.stdout.write("Resurrecting dead %s process...\n" %
+ proc_info.name)
+ try:
+ proc_info.respawn()
+ self.processes[proc_info.pid] = proc_info
+ if self.verbose:
+ sys.stdout.write("Resurrected %s (PID %d)\n" %
+ (proc_info.name, proc_info.pid))
+ except:
+ still_dead[proc_info.pid] = proc_info
+ # remember any processes that refuse to be resurrected
+ self.dead_processes = still_dead
if __name__ == "__main__":
def reaper(signal_number, stack_frame):
@@ -214,13 +245,9 @@
if o.errno == errno.ECHILD: break
raise
if pid == 0: break
- if not boss_of_bind.reap(pid, exit_status):
- signal.signal(signal.SIGCHLD, signal.SIG_DFL)
- boss_of_bind.shutdown()
- sys.exit(0)
+ if boss_of_bind:
+ boss_of_bind.reap(pid, exit_status)
-
-
def get_signame(signal_number):
"""Return the symbolic name for a signal."""
for sig in dir(signal):
@@ -232,14 +259,11 @@
# XXX: perhaps register atexit() function and invoke that instead
def fatal_signal(signal_number, stack_frame):
"""We need to exit (SIGINT or SIGTERM received)."""
- global boss_of_bind
global options
if options.verbose:
sys.stdout.write("Received %s.\n" % get_signame(signal_number))
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
- if boss_of_bind:
- boss_of_bind.shutdown()
- sys.exit(0)
+ boss_of_bind.runnable = False
def check_port(option, opt_str, value, parser):
"""Function to insure that the port we are passed is actually
@@ -265,6 +289,10 @@
# http://code.google.com/p/procname/
# http://github.com/lericson/procname/
+ # Create wakeup pipe for signal handlers
+ wakeup_pipe = os.pipe()
+ signal.set_wakeup_fd(wakeup_pipe[1])
+
# Set signal handlers for catching child termination, as well
# as our own demise.
signal.signal(signal.SIGCHLD, reaper)
@@ -279,6 +307,36 @@
sys.stderr.write("Error on startup: %s\n" % startup_result)
sys.exit(1)
- while True:
- time.sleep(1)
-
+ # In our main loop, we check for dead processes or messages
+ # on the c-channel.
+ event_poller = select.poll()
+ wakeup_fd = wakeup_pipe[0]
+ event_poller.register(wakeup_fd, select.POLLIN)
+ cc_fd = boss_of_bind.cc_session._socket.fileno()
+ event_poller.register(cc_fd, select.POLLIN)
+ while boss_of_bind.runnable:
+ # XXX: get time for next restart for poll
+
+ # poll() can raise EINTR when a signal arrives,
+ # even if they are resumable, so we have to catch
+ # the exception
+ try:
+ events = event_poller.poll()
+ except select.error as err:
+ if err.args[0] == errno.EINTR:
+ events = []
+ else:
+ sys.stderr.write("Error with poll(); %s\n" % err)
+ break
+
+ for (fd, event) in events:
+ if fd == cc_fd:
+ boss_of_bind.recv_and_process_cc_msg()
+ elif fd == wakeup_fd:
+ os.read(wakeup_fd, 32)
+
+ boss_of_bind.restart_processes()
+
+ # shutdown
+ signal.signal(signal.SIGCHLD, signal.SIG_DFL)
+ boss_of_bind.shutdown()
More information about the bind10-changes
mailing list