"""
Restful json `CherryPy <http://cherrypy.org/>`_ server.
The server script mounts a `WebSearcher`_ (read_only) or `WebIndexer`_ root.
Standard `CherryPy configuration <http://www.cherrypy.org/wiki/ConfigAPI>`_ applies,
and the provided `custom tools <#tools>`_ are also configurable.
All request and response bodies are `application/json values <http://tools.ietf.org/html/rfc4627.html#section-2.1>`_.
WebSearcher exposes resources for an IndexSearcher.
In addition to search requests, it provides access to term and document information in the index.
Note Lucene doc ids are ephemeral; they should only be used across requests for the same index version.
* :meth:`/ <WebSearcher.index>`
* :meth:`/search <WebSearcher.search>`
* :meth:`/docs <WebSearcher.docs>`
* :meth:`/terms <WebSearcher.terms>`
* :meth:`/update <WebSearcher.update>`
WebIndexer extends WebSearcher, exposing additional resources and methods for an Indexer.
Single documents may be added, deleted, or replaced by a unique indexed field.
Multiples documents may also be added or deleted by query at once.
By default changes are not visible until the update resource is called to commit a new index version.
If a near real-time Indexer is used (an experimental feature in Lucene), then changes are instantly searchable.
In such cases a commit still hasn't occurred; the index based :meth:`validation headers <validate>` shouldn't be used for caching.
* :meth:`/ <WebIndexer.index>`
* :meth:`/search <WebIndexer.search>`
* :meth:`/docs <WebIndexer.docs>`
* :meth:`/fields <WebIndexer.fields>`
* :meth:`/update <WebIndexer.update>`
Custom servers should create and mount WebSearchers and WebIndexers as needed.
:meth:`Caches <WebSearcher.update>` and :meth:`field settings <WebIndexer.fields>` can then be applied directly before `starting <#start>`_ the server.
WebSearchers and WebIndexers can of course also be subclassed for custom interfaces.
CherryPy and Lucene VM integration issues:
* Monitors (such as autoreload) are not compatible with the VM unless threads are attached.
* WorkerThreads must be also attached to the VM.
* VM initialization must occur after daemonizing.
* Recommended that the VM ignores keyboard interrupts (-Xrs) for clean server shutdown.
"""
from future_builtins import filter, map
import re
import time
import httplib
import heapq
import collections
import itertools, operator
import os, optparse
from email.utils import formatdate
import contextlib
try:
import simplejson as json
except ImportError:
import json
import lucene
import cherrypy
try:
from . import engine
except ValueError:
import engine
def tool(hook):
"Return decorator to register tool at given hook point."
def decorator(func):
setattr(cherrypy.tools, func.__name__.rstrip('_'), cherrypy.Tool(hook, func))
return func
return decorator
@tool('before_handler')
[docs]def json_(indent=None, content_type='application/json', process_body=None):
"""Handle request bodies and responses in json format.
:param indent: indentation level for pretty printing
:param content_type: request media type and response content-type header
:param process_body: optional function to process body into request.params
"""
request = cherrypy.serving.request
media_type = request.headers.get('content-type')
if media_type == content_type:
with HTTPError(httplib.BAD_REQUEST, ValueError, AttributeError):
request.json = json.load(request.body)
if process_body is not None:
with HTTPError(httplib.BAD_REQUEST, TypeError):
request.params.update(process_body(request.json))
elif media_type is not None:
message = "Received Content-Type header {0}; only {1} is supported.".format(media_type, content_type)
raise cherrypy.HTTPError(httplib.UNSUPPORTED_MEDIA_TYPE, message)
headers = cherrypy.response.headers
handler = request.handler
def json_handler(*args, **kwargs):
body = handler(*args, **kwargs)
if headers['content-type'].startswith('text/'):
headers['content-type'] = content_type
body = json.dumps(body, indent=indent)
return body
request.handler = json_handler
@tool('on_start_resource')
[docs]def allow(methods=('GET', 'HEAD')):
"Only allow specified methods."
request = cherrypy.serving.request
if request.method not in methods and not isinstance(request.handler, cherrypy.HTTPError):
cherrypy.response.headers['allow'] = ', '.join(methods)
message = "The path {0!r} does not allow {1}.".format(request.path_info, request.method)
raise cherrypy.HTTPError(httplib.METHOD_NOT_ALLOWED, message)
@tool('before_finalize')
[docs]def time_():
"Return response time in headers."
response = cherrypy.serving.response
response.headers['x-response-time'] = time.time() - response.time
@tool('on_start_resource')
[docs]def validate(methods=('GET', 'HEAD'), etag=True, last_modified=True, max_age=None, expires=None):
"""Return and validate caching headers for GET requests.
:param methods: only set headers for specified methods
:param etag: return weak entity tag header based on index version and validate if-match headers
:param last_modified: return last-modified header based on index timestamp and validate if-modified headers
:param max_age: return cache-control max-age and age headers based on last update timestamp
:param expires: return expires header offset from last update timestamp
"""
request = cherrypy.serving.request
headers = cherrypy.response.headers
if request.method in methods and not isinstance(request.handler, cherrypy.HTTPError):
if etag:
headers['etag'] = 'W/"{0}"'.format(request.app.root.searcher.version)
cherrypy.lib.cptools.validate_etags()
if last_modified:
headers['last-modified'] = formatdate(request.app.root.searcher.timestamp, usegmt=True)
cherrypy.lib.cptools.validate_since()
if max_age is not None:
headers['age'] = int(time.time() - request.app.root.updated)
headers['cache-control'] = 'max-age={0}'.format(max_age)
if expires is not None:
headers['expires'] = formatdate(expires + request.app.root.updated, usegmt=True)
@tool('before_handler')
def params(**types):
"Convert specified request params."
params = cherrypy.request.params
with HTTPError(httplib.BAD_REQUEST, ValueError):
for key in set(types).intersection(params):
params[key] = types[key](params[key])
def multi(value):
return value and value.split(',')
class params:
"Parameter parsing."
@staticmethod
def q(searcher, q, **options):
options = dict((key.partition('.')[-1], options[key]) for key in options if key.startswith('q.'))
field = options.pop('field', [])
fields = [field] if isinstance(field, basestring) else field
fields = [name.partition('^')[::2] for name in fields]
if any(boost for name, boost in fields):
field = dict((name, float(boost or 1.0)) for name, boost in fields)
elif isinstance(field, basestring):
(field, boost), = fields
else:
field = [name for name, boost in fields] or ''
if 'type' in options:
with HTTPError(httplib.BAD_REQUEST, AttributeError):
return getattr(engine.Query, options['type'])(field, q)
for key in set(options) - set(['op', 'version']):
with HTTPError(httplib.BAD_REQUEST, ValueError):
options[key] = json.loads(options[key])
if q is not None:
with HTTPError(httplib.BAD_REQUEST, lucene.JavaError):
return searcher.parse(q, field=field, **options)
@staticmethod
def fields(searcher, fields=None, **options):
if fields is not None:
fields = dict.fromkeys(fields)
multi = options.get('fields.multi', ())
indexed = (field.split(':') for field in options.get('fields.indexed', ()))
indexed = dict((item[0], searcher.comparator(*item)) for item in indexed)
return fields, multi, indexed
def json_error(version, **body):
"Transform errors into json format."
tool = cherrypy.request.toolmaps['tools'].get('json', {})
cherrypy.response.headers['content-type'] = tool.get('content_type', 'application/json')
return json.dumps(body, indent=tool.get('indent'))
def attach_thread(id=None):
"Attach current cherrypy worker thread to lucene VM."
lucene.getVMEnv().attachCurrentThread()
class Autoreloader(cherrypy.process.plugins.Autoreloader):
"Autoreload monitor compatible with lucene VM."
def run(self):
attach_thread()
cherrypy.process.plugins.Autoreloader.run(self)
class AttachedMonitor(cherrypy.process.plugins.Monitor):
"Periodically run a callback function in an attached thread."
def __init__(self, bus, callback, frequency=cherrypy.process.plugins.Monitor.frequency):
def run():
attach_thread()
callback()
cherrypy.process.plugins.Monitor.__init__(self, bus, run, frequency)
@contextlib.contextmanager
def HTTPError(status, *exceptions):
"Interpret exceptions as an HTTPError with given status code."
try:
yield
except exceptions as exc:
raise cherrypy.HTTPError(status, str(exc))
[docs]class WebSearcher(object):
"Dispatch root with a delegated Searcher."
_cp_config = dict.fromkeys(map('tools.{0}.on'.format, ['gzip', 'accept', 'json', 'allow', 'time', 'validate']), True)
_cp_config.update({'error_page.default': json_error, 'tools.gzip.mime_types': ['text/html', 'text/plain', 'application/json'], 'tools.accept.media': 'application/json'})
def __init__(self, *directories, **kwargs):
self.searcher = engine.MultiSearcher(directories, **kwargs) if len(directories) > 1 else engine.IndexSearcher(*directories, **kwargs)
self.updated = time.time()
@classmethod
def new(cls, *args, **kwargs):
"Return new uninitialized root which can be mounted on dispatch tree before VM initialization."
self = object.__new__(cls)
self.args, self.kwargs = args, kwargs
return self
def close(self):
self.searcher.close()
@cherrypy.expose
@cherrypy.tools.json(process_body=lambda body: dict.fromkeys(body, True))
@cherrypy.tools.allow(methods=['POST'])
[docs] def update(self, **caches):
"""Refresh index version.
**POST** /update
Reopen searcher, optionally reloading caches, and return document count.
["filters"|"sorters"|"spellcheckers",... ]
:return: *int*
"""
self.searcher = self.searcher.reopen(**caches)
self.updated = time.time()
return len(self.searcher)
@cherrypy.expose
[docs] def index(self):
"""Return index information.
**GET** /
Return a mapping of the directory to the document count.
:return: {*string*: *int*,... }
"""
reader = self.searcher.indexReader
readers = reader.sequentialSubReaders if lucene.MultiReader.instance_(reader) else [reader]
return dict((unicode(reader.directory()), reader.numDocs()) for reader in readers)
@cherrypy.expose
@cherrypy.tools.params(fields=multi, **{'fields.multi': multi, 'fields.indexed': multi})
[docs] def docs(self, *path, **options):
"""Return ids or documents.
**GET** /docs
Return array of doc ids.
:return: [*int*,... ]
**GET** /docs/[*int*\|\ *chars*/*chars*]?
Return document mapping from id or unique name and value.
Optionally select stored, multi-valued, and cached indexed fields.
&fields=\ *chars*,... &fields.multi=\ *chars*,... &fields.indexed=\ *chars*\ [:*chars*],...
:return: {*string*: *string*\|\ *number*\|\ *array*,... }
"""
searcher = self.searcher
if not path:
return list(searcher)
with HTTPError(httplib.NOT_FOUND, ValueError):
id, = map(int, path) if len(path) == 1 else searcher.docs(*path)
fields, multi, indexed = params.fields(searcher, **options)
with HTTPError(httplib.NOT_FOUND, lucene.JavaError):
doc = searcher[id] if fields is None else searcher.get(id, *itertools.chain(fields, multi))
result = doc.dict(*multi, **(fields or {}))
result.update((name, indexed[name][id]) for name in indexed)
return result
@cherrypy.expose
@cherrypy.tools.params(count=int, start=int, fields=multi, sort=multi, facets=multi, hl=multi, mlt=int, spellcheck=int, timeout=float,
**{'fields.multi': multi, 'fields.indexed': multi, 'facets.count': int, 'facets.min': int, 'group.count': int, 'group.limit': int, 'hl.count': int, 'mlt.fields': multi})
[docs] def search(self, q=None, count=None, start=0, fields=None, sort=None, facets='', group='', hl='', mlt=None, spellcheck=0, timeout=None, **options):
"""Run query and return documents.
**GET** /search?
Return array of document objects and total doc count.
&q=\ *chars*\ &q.type=[term|prefix|wildcard]&q.\ *chars*\ =...,
query, optional type to skip parsing, and optional parser settings: q.field, q.op,...
&filter=\ *chars*
| cached filter applied to the query
| if a previously cached filter is not found, the value will be parsed as a query
&count=\ *int*\ &start=0
maximum number of docs to return and offset to start at
&fields=\ *chars*,... &fields.multi=\ *chars*,... &fields.indexed=\ *chars*\ [:*chars*],...
only include selected stored fields; multi-valued fields returned in an array; indexed fields with optional type are cached
&sort=\ [-]\ *chars*\ [:*chars*],... &sort.scores[=max]
| field name, optional type, minus sign indicates descending
| optionally score docs, additionally compute maximum score
&facets=\ *chars*,... &facets.count=\ *int*\&facets.min=0
| include facet counts for given field names; facets filters are cached
| optional maximum number of most populated facet values per field, and minimum count to return
&group=\ *chars*\ [:*chars*]&group.count=1&group.limit=\ *int*
| group documents by field value with optional type, up to given maximum count
| limit number of groups which return docs
&hl=\ *chars*,... &hl.count=1&hl.tag=strong&hl.enable=[fields|terms]
| stored fields to return highlighted
| optional maximum fragment count and html tag name
| optionally enable matching any field or any term
&mlt=\ *int*\ &mlt.fields=\ *chars*,... &mlt.\ *chars*\ =...,
| doc index (or id without a query) to find MoreLikeThis
| optional document fields to match
| optional MoreLikeThis settings: mlt.minTermFreq, mlt.minDocFreq,...
&spellcheck=\ *int*
| maximum number of spelling corrections to return for each query term, grouped by field
| original query is still run; use q.spellcheck=true to affect query parsing
&timeout=\ *number*
timeout search after elapsed number of seconds
:return:
| {
| "query": *string*,
| "count": *int*\|null,
| "maxscore": *number*\|null,
| "docs": [{"__id__": *int*, "__score__": *number*, "__highlights__": {*string*: *array*,... }, *string*: *object*,... },... ],
| "facets": {*string*: {*string*: *int*,... },... },
| "groups": [{"count": *int*, "value": *value*, "docs": [*object*,... ]},... ]
| "spellcheck": {*string*: {*string*: [*string*,... ],... },... },
| }
"""
searcher = self.searcher
reverse = False
if sort is not None:
sort = (re.match('(-?)(\w+):?(\w*)', field).groups() for field in sort)
sort = [(name, (type or 'string'), (reverse == '-')) for reverse, name, type in sort]
if count is None:
with HTTPError(httplib.BAD_REQUEST, ValueError, AttributeError):
reverse, = set(reverse for name, type, reverse in sort) # only one sort direction allowed with unlimited count
comparators = [searcher.comparator(name, type) for name, type, reverse in sort]
sort = comparators[0].__getitem__ if len(comparators) == 1 else lambda id: tuple(map(operator.itemgetter(id), comparators))
else:
with HTTPError(httplib.BAD_REQUEST, AttributeError):
sort = [searcher.sorter(name, type, reverse=reverse) for name, type, reverse in sort]
q = params.q(searcher, q, **options)
qfilter = options.pop('filter', None)
if qfilter is not None and qfilter not in searcher.filters:
searcher.filters[qfilter] = engine.Query.__dict__['filter'](params.q(searcher, qfilter, **options))
qfilter = searcher.filters.get(qfilter)
if mlt is not None:
if q is not None:
mlt = searcher.search(q, count=mlt+1, sort=sort, reverse=reverse).ids[mlt]
mltfields = options.pop('mlt.fields', ())
with HTTPError(httplib.BAD_REQUEST, ValueError):
attrs = dict((key.partition('.')[-1], json.loads(options[key])) for key in options if key.startswith('mlt.'))
q = searcher.morelikethis(mlt, *mltfields, **attrs)
if count is not None:
count += start
if count == 0:
start = count = 1
scores = options.get('sort.scores')
scores = {'scores': scores is not None, 'maxscore': scores == 'max'}
hits = searcher.search(q, filter=qfilter, count=count, sort=sort, reverse=reverse, timeout=timeout, **scores)[start:]
result = {'query': q and unicode(q), 'count': hits.count, 'maxscore': hits.maxscore}
tag, enable = options.get('hl.tag', 'strong'), options.get('hl.enable', '')
hlcount = options.get('hl.count', 1)
if hl:
hl = dict((name, searcher.highlighter(q, name, terms='terms' in enable, fields='fields' in enable, tag=tag)) for name in hl)
fields, multi, indexed = params.fields(searcher, fields, **options)
if fields is None:
fields = {}
else:
hits.fields = lucene.MapFieldSelector(list(itertools.chain(fields, multi)))
with HTTPError(httplib.BAD_REQUEST, AttributeError):
groups = hits.groupby(searcher.comparator(*group.split(':')).__getitem__) if group else [hits]
result['groups'], limit = [], options.get('group.limit', len(groups))
for hits in groups[:limit]:
docs = []
for hit in hits[:options.get('group.count', 1) if group else None]:
doc = hit.dict(*multi, **fields)
doc.update((name, indexed[name][hit.id]) for name in indexed)
fragments = (hl[name].fragments(hit.id, hlcount) for name in hl)
if hl:
doc['__highlights__'] = dict((name, value) for name, value in zip(hl, fragments) if value is not None)
docs.append(doc)
result['groups'].append({'docs': docs, 'count': len(hits), 'value': getattr(hits, 'value', None)})
for hits in groups[limit:]:
result['groups'].append({'docs': [], 'count': len(hits), 'value': hits.value})
if not group:
result['docs'] = result.pop('groups')[0]['docs']
q = q or lucene.MatchAllDocsQuery()
if facets:
facets = (tuple(facet.split(':')) if ':' in facet else facet for facet in facets)
facets = result['facets'] = searcher.facets(q, *facets)
if 'facets.min' in options:
for name, counts in facets.items():
facets[name] = dict((term, count) for term, count in counts.items() if count >= options['facets.min'])
if 'facets.count' in options:
for name, counts in facets.items():
facets[name] = dict((term, counts[term]) for term in heapq.nlargest(options['facets.count'], counts, key=counts.__getitem__))
if spellcheck:
terms = result['spellcheck'] = collections.defaultdict(dict)
for name, value in engine.Query.__dict__['terms'](q):
terms[name][value] = list(itertools.islice(searcher.correct(name, value), spellcheck))
return result
@cherrypy.expose
@cherrypy.tools.params(count=int, step=int)
[docs] def terms(self, name='', value=':', *path, **options):
"""Return data about indexed terms.
**GET** /terms?
Return field names, with optional selection.
&option=\ *chars*
:return: [*string*,... ]
**GET** /terms/*chars*\[:int|float\]?step=0
Return term values for given field name, with optional type and step for numeric encoded values.
:return: [*string*,... ]
**GET** /terms/*chars*/*chars*\[\*\|?\|:*chars*\|~\ *number*\]
Return term values (wildcards, slices, or fuzzy terms) for given field name.
:return: [*string*,... ]
**GET** /terms/*chars*/*chars*\[\*\|~\]?count=\ *int*
Return spellchecked term values ordered by decreasing document frequency.
Prefixes (*) are optimized to be suitable for real-time query suggestions; all terms are cached.
:return: [*string*,... ]
**GET** /terms/*chars*/*chars*
Return document count for given term.
:return: *int*
**GET** /terms/*chars*/*chars*/docs
Return document ids for given term.
:return: [*int*,... ]
**GET** /terms/*chars*/*chars*/docs/counts
Return document ids and frequency counts for given term.
:return: [[*int*, *int*],... ]
**GET** /terms/*chars*/*chars*/docs/positions
Return document ids and positions for given term.
:return: [[*int*, [*int*,... ]],... ]
"""
searcher = self.searcher
if not name:
return sorted(searcher.names(**options))
if ':' in name:
with HTTPError(httplib.BAD_REQUEST, ValueError, AttributeError):
name, type = name.split(':')
type = getattr(__builtins__, type)
return list(searcher.numbers(name, step=options.get('step', 0), type=type))
if ':' in value:
with HTTPError(httplib.BAD_REQUEST, ValueError):
start, stop = value.split(':')
return list(searcher.terms(name, start, stop or None))
if 'count' in options:
if value.endswith('*'):
return searcher.suggest(name, value.rstrip('*'), options['count'])
if value.endswith('~'):
return list(itertools.islice(searcher.correct(name, value.rstrip('~')), options['count']))
if '*' in value or '?' in value:
return list(searcher.terms(name, value))
if '~' in value:
with HTTPError(httplib.BAD_REQUEST, ValueError):
value, similarity = value.split('~')
similarity = float(similarity or 0.5)
return list(searcher.terms(name, value, minSimilarity=similarity))
if not path:
return searcher.count(name, value)
if path[0] == 'docs':
if path[1:] == ():
return list(searcher.docs(name, value))
if path[1:] == ('counts',):
return list(searcher.docs(name, value, counts=True))
if path[1:] == ('positions',):
return list(searcher.positions(name, value))
raise cherrypy.NotFound()
[docs]class WebIndexer(WebSearcher):
"Dispatch root with a delegated Indexer, exposing write methods."
def __init__(self, *args, **kwargs):
self.indexer = engine.Indexer(*args, **kwargs)
self.updated = time.time()
@property
def searcher(self):
return self.indexer.indexSearcher
def close(self):
self.indexer.close()
WebSearcher.close(self)
def refresh(self):
if self.indexer.nrt:
self.indexer.refresh()
self.updated = time.time()
else:
cherrypy.response.status = httplib.ACCEPTED
@cherrypy.expose
@cherrypy.tools.json(process_body=lambda body: {'directories': list(body)})
@cherrypy.tools.allow(methods=['GET', 'HEAD', 'POST'])
[docs] def index(self, directories=()):
"""Add indexes. See :meth:`WebSearcher.index` for GET method.
**POST** /
Add indexes without optimization.
[*string*,... ]
"""
if cherrypy.request.method == 'POST':
for directory in directories:
self.indexer += directory
self.refresh()
return {unicode(self.indexer.directory): len(self.indexer)}
@cherrypy.expose
@cherrypy.tools.json(process_body=lambda body: dict.fromkeys(body, True))
@cherrypy.tools.allow(methods=['POST', 'PUT', 'DELETE'])
[docs] def update(self, *path, **options):
"""Commit index changes and refresh index version.
**POST** /update
Commit write operations and return document count. See :meth:`WebSearcher.update` for caching options.
["expunge"|"optimize",... ]
:return: *int*
**PUT, DELETE** /update/snapshot
Snapshot current index commit and return array of referenced filenames, or release previous snapshot.
:return: [*string*,... ]
"""
allow(('PUT', 'DELETE') if path else ('GET', 'POST')) # allow direct method call
if not path:
self.indexer.commit(**options)
self.updated = time.time()
return len(self.indexer)
path = path[:hasattr(lucene, 'IndexWriterConfig')]
with HTTPError(httplib.CONFLICT, lucene.JavaError):
if cherrypy.request.method == 'PUT':
cherrypy.response.status = httplib.CREATED
return list(self.indexer.policy.snapshot(*path).fileNames)
self.indexer.policy.release(*path)
@cherrypy.expose
@cherrypy.tools.allow(methods=['GET', 'HEAD', 'POST', 'PUT', 'DELETE'])
[docs] def docs(self, *path, **options):
"""Add or return documents. See :meth:`WebSearcher.docs` for GET method.
**POST** /docs
Add documents to index.
[{*string*: *string*\|\ *number*\|\ *array*,... },... ]
**PUT, DELETE** /docs/*chars*/*chars*
Set or delete document. Unique term should be indexed and is added to the new document.
{*string*: *string*\|\ *number*\|\ *array*,... }
"""
with HTTPError(httplib.NOT_FOUND, IndexError):
allow([('GET', 'HEAD', 'POST'), ('GET', 'HEAD'), ('GET', 'HEAD', 'PUT', 'DELETE')][len(path)])
request = cherrypy.serving.request
if request.method in ('GET', 'HEAD'):
return WebSearcher.docs(self, *path, **options)
if request.method == 'DELETE':
self.indexer.delete(*path)
elif request.method == 'PUT':
name, value = path
doc = getattr(request, 'json', {})
with HTTPError(httplib.CONFLICT, KeyError, AssertionError):
assert self.indexer.fields[name].index.indexed, 'unique field must be indexed'
with HTTPError(httplib.BAD_REQUEST, AssertionError):
assert doc.setdefault(name, value) == value, 'multiple values for unique field'
self.indexer.update(name, value, doc)
else:
for doc in getattr(request, 'json', ()):
self.indexer.add(doc)
self.refresh()
docs._cp_config.update(WebSearcher.docs._cp_config)
@cherrypy.expose
@cherrypy.tools.allow(methods=['GET', 'HEAD', 'DELETE'])
[docs] def search(self, q=None, **options):
"""Run or delete a query. See :meth:`WebSearcher.search` for GET method.
**DELETE** /search?q=\ *chars*
Delete documents which match query.
"""
if cherrypy.request.method != 'DELETE':
return WebSearcher.search(self, q, **options)
if q is None:
self.indexer.deleteAll()
else:
self.indexer.delete(params.q(self.searcher, q, **options))
self.refresh()
search._cp_config.update(WebSearcher.search._cp_config)
@cherrypy.expose
@cherrypy.tools.json(process_body=dict)
@cherrypy.tools.allow(methods=['GET', 'HEAD', 'PUT'])
@cherrypy.tools.validate(on=False)
[docs] def fields(self, name='', **settings):
"""Return or store a field's parameters.
**GET** /fields
Return known field names.
:return: [*string*,... ]
**GET, PUT** /fields/*chars*
Set and return parameters for given field name.
{"store"|"index"|"termvector": *string*\|true|false,... }
:return: {"store": *string*, "index": *string*, "termvector": *string*}
"""
if not name:
allow()
return sorted(self.indexer.fields)
if cherrypy.request.method == 'PUT':
if name not in self.indexer.fields:
cherrypy.response.status = httplib.CREATED
with HTTPError(httplib.BAD_REQUEST, AttributeError):
self.indexer.set(name, **settings)
with HTTPError(httplib.NOT_FOUND, KeyError):
field = self.indexer.fields[name]
return dict((name, str(getattr(field, name))) for name in ['store', 'index', 'termvector'])
def init(vmargs='-Xrs', **kwargs):
"Callback to initialize VM and app roots after daemonizing."
lucene.initVM(vmargs=vmargs, **kwargs)
for app in cherrypy.tree.apps.values():
if isinstance(app.root, WebSearcher):
app.root.__init__(*app.root.__dict__.pop('args'), **app.root.__dict__.pop('kwargs'))
[docs]def mount(root, path='', config=None, autoupdate=0):
"""Attach root and subscribe to plugins.
:param root,path,config: see cherrypy.tree.mount
:param autoupdate: see command-line options
"""
if hasattr(root, 'close'):
cherrypy.engine.subscribe('stop', root.close)
if autoupdate:
AttachedMonitor(cherrypy.engine, root.update, autoupdate).subscribe()
return cherrypy.tree.mount(root, path, config)
[docs]def start(root=None, path='', config=None, pidfile='', daemonize=False, autoreload=0, autoupdate=0, callback=None):
"""Attach root, subscribe to plugins, and start server.
:param root,path,config: see cherrypy.quickstart
:param pidfile,daemonize,autoreload,autoupdate: see command-line options
:param callback: optional callback function scheduled after daemonizing
"""
cherrypy.engine.subscribe('start_thread', attach_thread)
cherrypy.config['engine.autoreload.on'] = False
if pidfile:
cherrypy.process.plugins.PIDFile(cherrypy.engine, os.path.abspath(pidfile)).subscribe()
if daemonize:
cherrypy.config['log.screen'] = False
cherrypy.process.plugins.Daemonizer(cherrypy.engine).subscribe()
if autoreload:
Autoreloader(cherrypy.engine, autoreload).subscribe()
if callback:
priority = (cherrypy.process.plugins.Daemonizer.start.priority + cherrypy.server.start.priority) // 2
cherrypy.engine.subscribe('start', callback, priority)
if root is not None:
mount(root, path, config, autoupdate)
cherrypy.quickstart(cherrypy.tree.apps.get(path), path, config)
parser = optparse.OptionParser(usage='python -m lupyne.server [index_directory ...]')
parser.add_option('-r', '--read-only', action='store_true', help='expose only read methods; no write lock')
parser.add_option('-c', '--config', help='optional configuration file or json object of global params')
parser.add_option('-p', '--pidfile', metavar='FILE', help='store the process id in the given file')
parser.add_option('-d', '--daemonize', action='store_true', help='run the server as a daemon')
parser.add_option('--autoreload', type=int, metavar='SECONDS', help='automatically reload modules; replacement for engine.autoreload')
parser.add_option('--autoupdate', type=int, metavar='SECONDS', help='automatically update index version')
if __name__ == '__main__':
options, args = parser.parse_args()
read_only = options.__dict__.pop('read_only')
if options.config and not os.path.exists(options.config):
options.config = {'global': json.loads(options.config)}
cls = WebSearcher if (read_only or len(args) > 1) else WebIndexer
root = cls.new(*map(os.path.abspath, args))
start(root, callback=init, **options.__dict__)