# Monitor and control Apache web server workers from Python.
#
# Author: Peter Odding <peter@peterodding.com>
# Last Change: March 28, 2019
# URL: https://apache-manager.readthedocs.io
"""The :mod:`apache_manager` module defines the core logic of the Apache manager."""
# Standard library modules.
import logging
import os
import re
# External dependencies.
from bs4 import BeautifulSoup
from humanfriendly import compact, concatenate, format_size, format_timespan, pluralize, Timer
from proc.apache import find_apache_memory_usage, find_apache_workers
from proc.core import Process
from property_manager import (
PropertyManager,
cached_property,
lazy_property,
mutable_property,
required_property,
writable_property,
)
from six.moves.urllib.error import HTTPError
from six.moves.urllib.request import urlopen
# Modules included in our package.
from apache_manager.exceptions import AddressDiscoveryError, StatusPageError
# Semi-standard module versioning.
__version__ = '1.2'
# Hide internal identifiers from API documentation.
__all__ = (
# Configuration defaults.
'HANGING_WORKER_THRESHOLD',
'IDLE_MODES',
'NATIVE_WORKERS_LABEL',
'PORTS_CONF',
'STATUS_COLUMNS',
# Public classes.
'ApacheManager',
'KillableWorker',
'NetworkAddress',
'NonNativeWorker',
'WorkerStatus',
)
PORTS_CONF = '/etc/apache2/ports.conf'
"""
The absolute pathname of the configuration file that defines the port(s) that
Apache listens on (a string). This constant is used as a default value for
:attr:`~ApacheManager.ports_config`. It's based on Debian's Apache 2
packaging.
"""
STATUS_COLUMNS = (
'Srv', 'PID', 'Acc', 'M', 'CPU', 'SS', 'Req', 'Conn', 'Child', 'Slot',
'Client', 'VHost', 'Request',
)
"""
The expected column names in the worker status table of the Apache status page
(an iterable of strings).
"""
IDLE_MODES = ('_', 'I', '.')
"""
Worker modes that are considered idle (a tuple of strings). Refer to
:attr:`WorkerStatus.is_idle`.
"""
NATIVE_WORKERS_LABEL = 'native'
"""
The label used to identify native Apache workers in exported metrics (a string).
This is used by :func:`ApacheManager.save_metrics()` to distinguish native
Apache workers from WSGI process groups.
"""
HANGING_WORKER_THRESHOLD = 60 * 5
"""
The number of seconds before an active worker is considered 'hanging' (a
number). Refer to :attr:`ApacheManager.hanging_workers`.
"""
# Initialize a logger for this module.
logger = logging.getLogger(__name__)
[docs]class ApacheManager(PropertyManager):
"""
Apache web server manager.
Most of the computed properties on this class are cached to avoid repeated
expensive computations (refer to :class:`~property_manager.cached_property`
for details). The easiest way to invalidate all of these cached properties
at once is to call the :func:`refresh()` method.
"""
[docs] def __init__(self, *args, **kw):
"""
Initialize a :class:`ApacheManager` object.
:param args: The first positional argument is used to set
:attr:`ports_config`.
"""
if args:
args = list(args)
kw['ports_config'] = args.pop(0)
super(ApacheManager, self).__init__(*args, **kw)
[docs] @writable_property
def num_killed_active(self):
"""The number of active workers killed by :func:`kill_workers()` (an integer)."""
return 0
[docs] @writable_property
def num_killed_idle(self):
"""The number of idle workers killed by :func:`kill_workers()` (an integer)."""
return 0
[docs] @writable_property
def status_response(self):
"""
Whether the status page was fetched successfully by :func:`fetch_status_page()` (a boolean).
This will be :data:`None` as long as :attr:`fetch_status_page` hasn't been called.
"""
return None
[docs] @mutable_property
def ports_config(self):
"""
The absolute pathname of the ``ports.conf`` configuration file (a string).
The configuration file is expected to define the port(s) that Apache
listens on. Defaults to :data:`PORTS_CONF`.
"""
return PORTS_CONF
[docs] @cached_property
def listen_addresses(self):
"""
The network address(es) where Apache is listening (a list of :class:`NetworkAddress` objects).
:raises: :exc:`.AddressDiscoveryError` when discovery fails (e.g. because
``/etc/apache2/ports.conf`` is missing or can't be parsed).
Here's an example:
>>> from apache_manager import ApacheManager
>>> manager = ApacheManager()
>>> manager.listen_addresses
[NetworkAddress(protocol='http',
address='127.0.0.1',
port=81,
url='http://127.0.0.1:81')]
"""
logger.debug("Discovering where Apache is listening by parsing %s ..", self.ports_config)
# Make sure the configuration file exists.
if not os.path.isfile(self.ports_config):
raise AddressDiscoveryError(compact("""
Failed to discover any addresses or ports that Apache is
listening on! The configuration file {filename} is missing. Are
you sure the Apache web server is properly installed? If so
you'll have to specify the configuration's location.
""", filename=self.ports_config))
# Parse the configuration file.
matched_addresses = []
pattern = re.compile(r'^(.+):(\d+)$')
with open(self.ports_config) as handle:
for lnum, line in enumerate(handle, start=1):
tokens = line.split()
# We are looking for `Listen' directives.
if len(tokens) >= 2 and tokens[0] == 'Listen':
parsed_value = None
# Check for a port number without an IP address.
if tokens[1].isdigit():
parsed_value = NetworkAddress(port=int(tokens[1]))
else:
# Check for an IP address with a port number.
match = pattern.match(tokens[1])
if match:
address = match.group(1)
port = int(match.group(2))
if address == '0.0.0.0':
address = '127.0.0.1'
parsed_value = NetworkAddress(address=address, port=port)
# Check if we have a match.
if parsed_value is not None:
# Override the protocol if necessary.
if len(tokens) >= 3:
parsed_value.protocol = tokens[2]
logger.debug("Parsed listen directive on line %i: %s", lnum, parsed_value)
matched_addresses.append(parsed_value)
else:
logger.warning("Failed to parse listen directive on line %i: %s", lnum, line)
# Sanity check the results.
if not matched_addresses:
raise AddressDiscoveryError(compact("""
Failed to discover any addresses or ports that Apache is
listening on! Maybe I'm parsing the wrong configuration file?
({filename})
""", filename=self.ports_config))
# Log and return sorted port numbers.
logger.debug("Discovered %s that Apache is listening on: %s",
pluralize(len(matched_addresses), "address", "addresses"),
concatenate(map(str, matched_addresses)))
return matched_addresses
[docs] @cached_property(writable=True)
def html_status_url(self):
"""
The URL on which Apache's HTML status page can be retrieved (a string).
:raises: Any exceptions raised by :attr:`listen_addresses`.
Here's an example:
>>> from apache_manager import ApacheManager
>>> manager = ApacheManager()
>>> manager.html_status_url
'http://127.0.0.1:80/server-status'
"""
status_url = "%s/server-status" % self.listen_addresses[0].url
logger.debug("Discovered Apache HTML status page URL: %s", status_url)
return status_url
[docs] @cached_property
def text_status_url(self):
"""
The URL on which Apache's plain text status page can be retrieved (a string).
:raises: Any exceptions raised by :attr:`listen_addresses`.
Here's an example:
>>> from apache_manager import ApacheManager
>>> manager = ApacheManager()
>>> manager.text_status_url
'http://127.0.0.1:80/server-status?auto'
"""
status_url = "%s?auto" % self.html_status_url
logger.debug("Discovered Apache plain text status page URL: %s", status_url)
return status_url
[docs] @cached_property
def html_status(self):
"""
The content of Apache's `HTML status page`_ (a string). See also :attr:`text_status`.
:raises: Any exceptions raised by :func:`fetch_status_page()`.
.. _HTML status page: http://httpd.apache.org/docs/trunk/mod/mod_status.html
"""
return self.fetch_status_page(self.html_status_url)
[docs] @cached_property
def text_status(self):
"""
The content of Apache's `plain text status page`_ (a string). See also :attr:`html_status`.
:raises: Any exceptions raised by :func:`fetch_status_page()`.
Here's an example:
>>> from apache_manager import ApacheManager
>>> manager = ApacheManager()
>>> print manager.text_status
Total Accesses: 100
Total kBytes: 275
CPULoad: .000203794
Uptime: 181556
ReqPerSec: .000550794
BytesPerSec: 1.55104
BytesPerReq: 2816
BusyWorkers: 1
IdleWorkers: 5
Scoreboard: ____W._.......................................
.. _plain text status page: http://httpd.apache.org/docs/trunk/mod/mod_status.html#machinereadable
"""
return self.fetch_status_page(self.text_status_url).decode()
[docs] def fetch_status_page(self, status_url):
"""
Fetch an Apache status page and return its content.
:param url: The URL of the status page (a string).
:returns: The response body (a string).
:raises: :exc:`.StatusPageError` if fetching of the status page fails.
"""
timer = Timer()
# Get the Apache status page.
logger.debug("Fetching Apache status page from %s ..", status_url)
try:
response = urlopen(status_url)
except HTTPError as e:
# These objects can be treated as response objects.
response = e
# Validate the HTTP response status.
response_code = response.getcode()
if response_code != 200:
# Record the failure.
self.status_response = False
# Notify the caller using a custom exception.
raise StatusPageError(compact("""
Failed to retrieve Apache status page from {url}! Expected to
get HTTP response status 200, got {code} instead.
""", url=status_url, code=response_code))
response_body = response.read()
logger.debug("Fetched %s in %s.", format_size(len(response_body)), timer)
self.status_response = True
return response_body
[docs] @cached_property
def slots(self):
"""
The status of Apache workers (a list of :class:`WorkerStatus` objects).
:raises: Any exceptions raised by :attr:`html_status` or
:exc:`.StatusPageError` if parsing of the Apache status page
fails.
The :attr:`slots` property contains one :class:`WorkerStatus` object
for each worker "slot" that Apache has allocated. This means that some
of the :class:`WorkerStatus` objects may not have expected properties
like :attr:`~WorkerStatus.pid` because they describe an "empty slot".
See the :attr:`workers` property for a list of :class:`WorkerStatus`
objects without empty slots.
"""
# Use BeautifulSoup to parse the HTML response body.
soup = BeautifulSoup(self.html_status, "html.parser")
# Prepare a list of normalized column headings expected to be defined in the table.
required_columns = [normalize_text(c) for c in STATUS_COLUMNS]
# Check each table on the Apache status page, because different
# multiprocessing modules result in a status page with a different
# number of tables and the table with worker details is not clearly
# marked as such in the HTML output ...
for table in soup.findAll('table'):
# Parse the table into a list of dictionaries, one for each row.
matched_rows = list(parse_status_table(table))
# Filter out rows that don't contain the required columns.
validated_rows = [r for r in matched_rows if all(c in r for c in required_columns)]
# If one or more rows remain we found the right table! :-)
if validated_rows:
return [WorkerStatus(status_fields=f) for f in validated_rows]
raise StatusPageError(compact("""
Failed to parse Apache status page! No tables found containing all
of the required column headings and at least one row of data that
could be parsed.
"""))
[docs] @cached_property
def workers(self):
"""
The status of the Apache workers, a list of :class:`WorkerStatus` objects.
:raises: Any exceptions raised by :attr:`html_status` or
:exc:`.StatusPageError` if parsing of the Apache status page
fails.
This property's value is based on :attr:`slots` but excludes empty
slots (i.e. every :class:`WorkerStatus` object in :attr:`workers` will
have expected properties like :attr:`~WorkerStatus.pid`).
"""
return [ws for ws in self.slots if ws.m != '.']
[docs] @cached_property
def hanging_workers(self):
"""
A list of workers that appear to be 'hanging' (unresponsive).
:raises: Any exceptions raised by :attr:`html_status` or
:exc:`.StatusPageError` if parsing of the Apache status page
fails.
This property's value is based on :attr:`workers` but excludes workers
that aren't active and workers whose 'seconds since the beginning of
the current request' is lower than :data:`HANGING_WORKER_THRESHOLD`.
"""
return [ws for ws in self.workers if ws.is_active and ws.ss >= HANGING_WORKER_THRESHOLD]
[docs] @cached_property
def killable_workers(self):
"""A list of :class:`KillableWorker` objects."""
all_workers = list(self.workers)
native_pids = set(w.pid for w in self.workers)
for process in find_apache_workers():
if process.pid not in native_pids:
all_workers.append(NonNativeWorker(process=process))
return sorted(all_workers, key=lambda p: p.pid)
@property
def manager_metrics(self):
"""
Information about the interaction between the Apache manager and the Apache web server.
Here's an example of the resulting dictionary:
>>> from apache_manager import ApacheManager
>>> from pprint import pprint
>>> manager = ApacheManager()
>>> pprint(manager.manager_metrics)
{'workers_hanging': 0,
'workers_killed_active': 0,
'workers_killed_idle': 0,
'status_response': None}
Notes about these metrics:
- The ``status_response`` key is :data:`None` by default. Once an
Apache status page has been fetched it becomes :data:`True` if the
status page was fetched successfully or :data:`False` if fetching of
the status page failed (see :func:`fetch_status_page()`,
:attr:`html_status` and :attr:`text_status`).
- The ``workers_hanging`` key gives the number of hanging workers
(based on the length of :attr:`hanging_workers`).
- The ``workers_killed_active`` and ``workers_killed_idle`` keys give
the number of Apache workers killed by :func:`kill_workers()`.
"""
return dict(workers_hanging=len(self.hanging_workers),
workers_killed_active=self.num_killed_active,
workers_killed_idle=self.num_killed_idle,
status_response=self.status_response)
[docs] @cached_property
def server_metrics(self):
"""
Global web server metrics parsed from the machine readable plain text status page.
Here's an example of the values you can expect:
>>> from apache_manager import ApacheManager
>>> from pprint import pprint
>>> manager = ApacheManager()
>>> pprint(manager.server_metrics)
{'busy_workers': 1,
'bytes_per_request': 3120.19,
'bytes_per_second': 1.52158,
'cpu_load': 0.000195063,
'idle_workers': 4,
'requests_per_second': 0.000487657,
'total_accesses': 85,
'total_traffic': 259,
'uptime': 174303}
"""
logger.debug("Extracting metrics from Apache's plain text status page ..")
return dict(
# Example: "Total Accesses: 49038"
total_accesses=int(self.extract_metric(r'^Total Accesses: (\d+)')),
# Example: "Total kBytes: 169318"
total_traffic=int(self.extract_metric(r'^Total KBytes: (\d+)')) * 1024,
# Example: "CPULoad: 7.03642"
cpu_load=float(self.extract_metric(r'^CPULoad: ([0-9.]+)')),
# Example: "Uptime: 85017"
uptime=int(self.extract_metric(r'^Uptime: (\d+)')),
# Example: "ReqPerSec: .576802"
requests_per_second=float(self.extract_metric(r'^ReqPerSec: ([0-9.]+)')),
# Example: "BytesPerSec: 2039.38"
bytes_per_second=float(self.extract_metric(r'^BytesPerSec: ([0-9.]+)')),
# Example: "BytesPerReq: 3535.66"
bytes_per_request=float(self.extract_metric(r'^BytesPerReq: ([0-9.]+)')),
# Example: "BusyWorkers: 2"
busy_workers=int(self.extract_metric(r'^BusyWorkers: (\d+)')),
# Example: "IdleWorkers: 6"
idle_workers=int(self.extract_metric(r'^IdleWorkers: (\d+)')),
)
[docs] @cached_property
def memory_usage(self):
"""
The memory usage of the Apache workers (a :class:`~proc.apache.StatsList` object).
Based on :func:`proc.apache.find_apache_memory_usage()`. See also
:attr:`wsgi_process_groups`.
Here's an example:
>>> from apache_manager import ApacheManager
>>> from pprint import pprint
>>> manager = ApacheManager()
>>> pprint(manager.memory_usage)
[13697024, 466776064, 735391744, 180432896, 465453056]
>>> print(manager.memory_usage.min)
13697024
>>> print(manager.memory_usage.average)
141787428.571
>>> print(manager.memory_usage.max)
735391744
"""
return self.combined_memory_usage[0]
[docs] @cached_property
def wsgi_process_groups(self):
"""
The memory usage of Apache workers in WSGI process groups.
The value of this property is a dictionary with process group names as
keys and :class:`~proc.apache.StatsList` objects as values.
Based on :func:`proc.apache.find_apache_memory_usage()`. See also
:attr:`memory_usage`.
Here's an example:
>>> from apache_manager import ApacheManager
>>> from pprint import pprint
>>> manager = ApacheManager()
>>> pprint(manager.wsgi_process_groups)
{'group-one': [44048384, 44724224, 44048384],
'group-two': [52088832, 51879936, 55554048, 54956032, 54968320],
'other-group': [13697024, 13697024, 13697024, 13697024]}
"""
return self.combined_memory_usage[1]
[docs] @cached_property
def combined_memory_usage(self):
"""
The result of :func:`~proc.apache.find_apache_memory_usage()`.
This property caches the result so that when :attr:`memory_usage` and
:attr:`wsgi_process_groups` are both dereferenced, the function
:func:`~proc.apache.find_apache_memory_usage()` only has to be called
once.
"""
return find_apache_memory_usage()
[docs] def kill_workers(self, max_memory_active=0, max_memory_idle=0, timeout=0, dry_run=False):
"""
Kill Apache worker processes that exceed resource usage thresholds.
:param max_memory_active: The maximum number of bytes of memory that
active Apache worker processes are allowed to
use (an integer).
:param max_memory_idle: The maximum number of bytes of memory that
idle Apache worker processes are allowed to use
(an integer).
:param timeout: The maximum number of seconds since the beginning of
the most recent request (a number).
:returns: A list of integers with process ids of killed workers.
Some implementation notes about this method:
- If any of the parameters are zero the respective resource usage
threshold will not be applied.
- Memory usage is measured using :attr:`~KillableWorker.memory_usage`.
- The number of seconds since the beginning of the most recent request
is measured using :attr:`WorkerStatus.ss`.
- Worker processes are killed using the
:func:`~proc.core.Process.kill()` method of the
:class:`proc.core.Process` class.
See also :attr:`num_killed_active` and :attr:`num_killed_idle`.
"""
killed = set()
num_checked = 0
for worker in self.killable_workers:
# Depending on the multiprocessing module in use multiple workers
# may be using the same OS process. We leave it up to the caller
# whether's it's wise to kill workers using non-preforked processes
# (hint: it's not) but we definitely shouldn't try to kill a single
# OS process more than once!
if worker.pid not in killed:
kill_worker = False
memory_usage_threshold = max_memory_active if worker.is_active else max_memory_idle
if memory_usage_threshold and worker.memory_usage > memory_usage_threshold:
logger.info("Killing %s using %s (%s) ..",
worker, format_size(worker.memory_usage),
worker.request or 'last request unknown')
kill_worker = True
elif timeout and worker.is_active and getattr(worker, 'ss', 0) > timeout:
logger.info("Killing %s hanging for %s since last request (%s) ..",
worker, format_timespan(worker.ss),
worker.request or 'unknown')
kill_worker = True
if kill_worker:
if not dry_run:
worker.process.kill()
killed.add(worker.pid)
if worker.is_active:
self.num_killed_active += 1
else:
self.num_killed_idle += 1
num_checked += 1
if killed:
logger.info("Killed %i of %s.", len(killed), pluralize(num_checked, "Apache worker"))
else:
logger.info("No Apache workers killed (found %s within resource usage limits).",
pluralize(num_checked, "worker"))
return list(killed)
[docs] def save_metrics(self, data_file):
"""
Store monitoring metrics in a data file.
:param data_file: The pathname of the data file (a string).
This method stores the metrics provided by :attr:`manager_metrics` and
:attr:`server_metrics` in a text file in an easy to parse format.
Here's an example of what the contents of the file look like::
# Global Apache server metrics.
busy-workers 1
bytes-per-request 0.0
bytes-per-second 0.0
cpu-load 1.13893
idle-workers 4
requests-per-second 1.89822
total-accesses 15
total-traffic 0
uptime 790212
# Metrics internal to apache-manager.
status-response 0
workers-killed-active 0
workers-killed-idle 0
# Memory usage of native Apache worker processes.
memory-usage native count 5
memory-usage native min 331776
memory-usage native max 1662976
memory-usage native average 598016.0
memory-usage native median 331776
# Memory usage of 'example' WSGI worker processes.
memory-usage example count 4
memory-usage example min 356352
memory-usage example max 372736
memory-usage example average 368640.0
memory-usage example median 372736.0
The values in the example above have been aligned to ease readability;
in reality the names and values are delimited by tab characters (as
long as you parse the file as whitespace delimited name/value pairs it
will be fine, this is trivial to do with e.g. AWK_).
.. _AWK: https://en.wikipedia.org/wiki/AWK
"""
if data_file == '-':
logger.debug("Reporting metrics on standard output ..")
else:
logger.debug("Storing metrics in %s ..", data_file)
# Start with the server metrics.
output = ['# Global Apache server metrics.']
for name, value in sorted(self.server_metrics.items()):
output.append('%s\t%s' % (name.replace('_', '-'), value))
# Add our internal metrics.
output.extend(['', '# Metrics internal to apache-manager.'])
for name, value in sorted(self.manager_metrics.items()):
if isinstance(value, bool):
value = 0 if value else 1
output.append('%s\t%s' % (name.replace('_', '-'), value))
# Add memory usage metrics per group of (WSGI) workers.
groups = dict(self.wsgi_process_groups)
ordered_group_names = [NATIVE_WORKERS_LABEL] + sorted(groups.keys())
groups[NATIVE_WORKERS_LABEL] = self.memory_usage
metric_names = ('count', 'min', 'max', 'average', 'median')
for group_name in ordered_group_names:
output.append('')
if group_name == NATIVE_WORKERS_LABEL:
output.append('# Memory usage of native Apache worker processes.')
else:
output.append('# Memory usage of %r WSGI worker processes.' % group_name)
for metric in metric_names:
output.append('\t'.join([
'memory-usage', group_name, metric, str(
len(groups[group_name]) if metric == 'count'
else getattr(groups[group_name], metric)
),
]))
if data_file == '-':
print('\n'.join(output))
else:
temporary_file = '%s.tmp' % data_file
with open(temporary_file, 'w') as handle:
handle.write('\n'.join(output) + '\n')
os.rename(temporary_file, data_file)
[docs] def refresh(self):
"""Clear cached properties so that their values are recomputed when dereferenced."""
self.clear_cached_properties()
[docs]class NetworkAddress(PropertyManager):
"""Network address objects encapsulate everything we need to know to connect to Apache."""
@property
def url(self):
"""The URL corresponding to :attr:`protocol`, :attr:`address` and :attr:`port` (a string)."""
tokens = [self.protocol, '://', self.address]
if not ((self.protocol == 'http' and self.port == 80) or
(self.protocol == 'https' and self.port == 443)):
tokens.append(':%s' % self.port)
return ''.join(tokens)
[docs] @required_property
def protocol(self):
"""The protocol that Apache is listening for (one of the strings 'http' or 'https')."""
return 'https' if self.port == 443 else 'http'
[docs] @required_property
def address(self):
"""The IP address on which Apache is listening (a string)."""
return '127.0.0.1'
[docs] @required_property
def port(self):
"""The port number on which Apache is listening (an integer)."""
[docs] def __str__(self):
"""Use :attr:`url` for a human friendly representation."""
return self.url
[docs]class KillableWorker(PropertyManager):
"""
Abstract base class to represent killable Apache worker processes.
Worker processes can be killed based on resource usage thresholds like
memory usage and/or requests that are taking too long to process. There
are currently two implementations of killable workers:
- :class:`WorkerStatus` represents the information about a worker process
that was retrieved from Apache's status page.
- :class:`NonNativeWorker` represents processes that are direct descendants
of the master Apache process but are not included in the workers listed
on Apache's status page (e.g. WSGI daemon processes spawned by
mod_wsgi_).
"""
[docs] @required_property
def is_active(self):
""":data:`True` if the worker is processing a request, :data:`False` otherwise."""
@property
def is_alive(self):
""":data:`True` if :attr:`process` is running, :data:`False` otherwise."""
return self.process.is_alive if self.process else False
[docs] @lazy_property
def memory_usage(self):
"""
The memory usage of the worker process in bytes.
The value of this property is an integer or :data:`None` (if the
process disappeared before the process information is requested).
The value of this property is based on the
:attr:`~proc.core.Process.rss` property of the
:class:`proc.core.Process` class.
"""
return self.process.rss if self.process else None
[docs] @required_property
def pid(self):
"""
The process ID of the Apache worker (an integer or :data:`None`).
If :attr:`process` is set then the value of :attr:`pid` defaults to
:attr:`proc.core.Process.pid`.
"""
return self.process.pid if self.process else None
[docs] @mutable_property(cached=True)
def process(self):
"""
The :class:`proc.core.Process` object for this worker process (or :data:`None`).
If :attr:`pid` is set then the value of :attr:`process` defaults to the
result of :func:`proc.core.Process.from_pid()`. If the worker process
disappears before the process information is requested :attr:`process`
will be :data:`None`.
"""
return Process.from_pid(self.pid) if self.pid else None
[docs] @mutable_property
def request(self):
"""The HTTP status line of the most recent request (a string or :data:`None`)."""
[docs]class NonNativeWorker(KillableWorker):
"""
Non-native Apache worker processes.
Objects of this type represent processes that are direct descendants of the
master Apache process but are not included in the workers listed on
Apache's status page (e.g. WSGI daemon processes spawned by mod_wsgi_).
These processes (assumed to be workers of one kind or another) can only be
killed based on their memory usage, because this information can be easily
retrieved from the Linux ``/proc`` file system without an API provided by
the Apache web server (because there is no such API for non-native
workers, to the best of my knowledge).
.. _mod_wsgi: https://code.google.com/p/modwsgi/
"""
[docs] @required_property
def process(self):
"""The :class:`proc.core.Process` object for this worker process."""
[docs] @required_property
def is_active(self):
""":data:`True` because this information isn't available for non-native workers."""
return True
[docs] def __str__(self):
"""Render a human friendly representation of a non-native Apache worker."""
return "non-native worker %i" % self.pid
[docs]class WorkerStatus(KillableWorker):
"""
:class:`WorkerStatus` objects represent the state of an Apache worker.
These objects are constructed by the :attr:`ApacheManager.workers`
property. To give you an idea of what :class:`WorkerStatus` objects look
like, here's a simple example:
>>> from apache_manager import ApacheManager
>>> manager = ApacheManager()
>>> print(manager.workers[0])
WorkerStatus(acc=(0, 6, 128),
child=0.01,
conn=0.0,
cpu=0.03,
is_active=False,
is_alive=True,
is_idle=True,
m='_',
memory_usage=5185536,
pid=31212,
process=Process(...),
req=1,
request='GET /server-status HTTP/1.1',
slot=0.2,
srv=(0, 38),
ss=234)
The naming of the fields may look somewhat obscure, this is because they
match the names given on the Apache status page. If any of the fields are
not available their value will be :data:`None`. The following properties
are parsed from the Apache status page:
The following computed properties are based on the properties parsed from
the Apache status page:
"""
[docs] @required_property
def status_fields(self):
"""The raw status fields extracted from Apache's status page (a dictionary)."""
[docs] @lazy_property
def acc(self):
"""The number of accesses this connection / this child / this slot (a tuple of three integers)."""
raw_value = self.status_fields.get('acc', '0/0/0')
return tuple(coerce_value(int, n) for n in raw_value.split('/'))
[docs] @lazy_property
def child(self):
"""The number of megabytes transferred this child (a float)."""
return coerce_value(float, self.status_fields.get('child', '0'))
[docs] @lazy_property
def client(self):
"""The IP address of the client that was last served (a string)."""
return self.status_fields.get('client')
[docs] @lazy_property
def conn(self):
"""The number of kilobytes transferred this connection (a float)."""
return coerce_value(float, self.status_fields.get('conn', '0'))
[docs] @lazy_property
def cpu(self):
"""The CPU usage (number of seconds as a floating point number)."""
return coerce_value(float, self.status_fields.get('cpu', '0'))
@property
def is_idle(self):
"""
:data:`True` if the worker is idle, :data:`False` otherwise.
The value of this property is based on :attr:`m` and
:data:`IDLE_MODES`.
"""
return self.m in IDLE_MODES
@property
def is_active(self):
"""
:data:`True` if the worker isn't idle, :data:`False` otherwise.
The value of this property is based on :attr:`is_idle`.
"""
return not self.is_idle
[docs] @lazy_property
def m(self):
"""
The mode of operation (a string).
Here's an overview of known modes (not intended as an exhaustive list):
===== =================================
Mode Description
===== =================================
``_`` Waiting for connection
``S`` Starting up
``R`` Reading request
``W`` Sending reply
``K`` Keepalive (read)
``D`` DNS lookup
``C`` Closing connection
``L`` Logging
``G`` Gracefully finishing
``I`` Idle cleanup of worker
``.`` Open slot with no current process
===== =================================
See also :attr:`is_active` and :attr:`is_idle`.
"""
return self.status_fields.get('m')
[docs] @lazy_property
def pid(self):
"""The process ID of the Apache worker (an integer)."""
return coerce_value(int, self.status_fields.get('pid'))
[docs] @lazy_property
def req(self):
"""The number of milliseconds required to process the most recent request (an integer)."""
return coerce_value(int, self.status_fields.get('req'))
[docs] @lazy_property
def request(self):
"""
The HTTP status line of the most recent request (a string or :data:`None`).
The default value of the :attr:`request` field on Apache's status page
is the string ``NULL``. This obscure implementation detail is hidden by
the :attr:`request` property.
"""
value = self.status_fields.get('request', 'NULL')
return value if value != 'NULL' else None
[docs] @lazy_property
def slot(self):
"""The total number of megabytes transferred this slot (a float)."""
return coerce_value(float, self.status_fields.get('slot', '0'))
[docs] @lazy_property
def srv(self):
"""Child Server number and generation (a tuple of two integers)."""
raw_value = self.status_fields.get('srv', '0-0')
return tuple(coerce_value(int, n) for n in raw_value.split('-'))
[docs] @lazy_property
def ss(self):
"""The number of seconds since the beginning of the most recent request (a float)."""
return coerce_value(int, self.status_fields.get('ss', '0'))
[docs] @lazy_property
def vhost(self):
"""The server name and port of the virtual host that served the last request (a string)."""
return self.status_fields.get('vhost')
[docs] def __str__(self):
"""Render a human friendly representation of a native Apache worker."""
return "native worker %i (%s)" % (self.pid, "active" if self.is_active else "idle")
def parse_status_table(table):
"""Parse one of the status tables from Apache's HTML status page."""
headings = dict((i, normalize_text(coerce_tag(th))) for i, th in enumerate(table.findAll('th')))
logger.debug("Parsed table headings: %r", headings)
for tr in table.findAll('tr'):
values_by_index = [coerce_tag(td) for td in tr.findAll('td')]
logger.debug("Parsed values by index: %r", values_by_index)
if values_by_index:
# Ignore exceptions during coercion.
# TODO This can obscure real problems. Find a better way to make it robust!
try:
values_by_name = dict((headings[i], v) for i, v in enumerate(values_by_index))
logger.debug("Parsed values by name: %r", values_by_name)
yield values_by_name
except Exception:
pass
def coerce_tag(tag):
"""
Coerce a BeautifulSoup tag to its string contents (stripped from leading and trailing whitespace).
Used by :func:`parse_status_table()` to get the text values of HTML tags.
"""
try:
return u''.join(tag.findAll(text=True)).strip()
except Exception:
return ''
def coerce_value(type, value):
"""
Coerce a value to an expected type.
:param type: The type to coerce the value to (any type).
:param value: The value to coerce (any Python value).
:returns: The coerced value or :data:`None` if an exception is raised
during coercion.
Used by :class:`WorkerStatus` to coerce metrics parsed from the Apache
status page to their expected Python types.
"""
try:
return type(value)
except Exception:
return None
def normalize_text(value):
"""Lossy normalization of text values to make string comparisons less fragile."""
try:
return re.sub('[^a-z0-9]', '', value.lower())
except Exception:
return ''