import cgi, unicodedata # -------------------------------------------------------------------------------------------------------------- # cgi.FieldStorage replacement, because FieldStorage is crap # might be useful to provide access to cgi.maxlen # = maximum number of bytes to allow in a file upload # this is a module-level global though, which is annoying. class CGIForm(object): """FieldStorage wrapper with a saner API. Methods available: 'field' in form - check for existence of a field form.field - get a form field as a string, u'' if not defined Note: this ALWAYS returns a unicode string. by default utf-8 encoding is used. Set the 'encoding' parameter to change this behavior. form['field'] - get a form field as a list, [] if not defined form.list('field') - same as form['field'] form.text('field') - same as form.field form.int('field', default=0) - get the value as a number. Returns 'default' (which defaults to None) if non-numeric. form.file('upload') - return a file object. only works for uploaded files, returns None if not a file. Note: the syntax form.upload will return the file's contents as a string. This may raise ValueError on instantiation if an uploaded file has a completely invalid filename. """ encoding = 'utf-8' def __init__(self): self._fs = cgi.FieldStorage(keep_blank_values=True) self._cache = {} def __contains__(self, field): """Check for the existence of a given field in the form. This is subtly different from checking its *value*.""" return field in self._fs def _decode(self, s): return unicodedata.normalize('NFKC', strip_illegal_unicode(s.decode(self.encoding, 'ignore'))) def _fixup(self, fs): """Make a much more useful representation of a field. input: cgi.FieldStorage output: depends! - if it's a file, the FieldStorage with 'filename' and 'length' attributes added. - if it's a string, a unicode version of that string decoded with the proper encoding. """ if not getattr(fs, 'filename', False): # this is just a normal string field; decode it return self._decode(fs.value) # this is a file... we need to do some leg work fn = self._decode(fs.filename) # hackarounds for stupid browsers, notably msie but others might do this as well if '/' in fn: fn = fn.split('/')[-1] elif '\\' in fn: fn = fn.split('\\')[-1] # throw out garbage characters (hax?) # this should probably be precompiled and stuff fn = re.sub(r'[/\\:|~%*?]', '_', fn) fn = re.sub(r'^[.~]+|[.~]+$', '', fn) if not fn: raise ValueError, 'Invalid filename' fs.filename = fn # get the filename. (the cgi parser already knew this at some point, so why didn't it save it?) fs.file.seek(0, 2) # EOF fs.length = fs.file.tell() fs.file.seek(0) # BOF return fs def list(self, field): """Retrieve all values for a given field name as a list. String values are returned as unicode; files are returned as modified FieldStorage objects with .filename and .length parameters properly defined.""" if field in self._cache: return self._cache[field] # _fs[field] will either be a FieldStorage of some sort, or a list thereof # or it won't exist, in which case we'll turn it into a blank list try: data = self._fs[field] except: data = [] # FieldStorage doesn't always return lists, so let's fix that. if isinstance(data, list): data = map(self._fixup, data) else: data = [self._fixup(data)] # from the client side, when requesting a list, if it's not an instance of basestring, then # assume it's a file -- guaranteed to have field.filename, and the usual file manipulation # functions (read, seek, etc.) are located in field.file.* self._cache[field] = data return data def text(self, field): """Get the text value of a field. If the field is an uploaded file, behavior is undefined. (Currently, the filename is returned.) A blank Unicode string is returned for nonexistent fields; use __contains__ to see if a given field actually exists or not.""" try: data = self[field][0] except: data = u'' if not isinstance(data, basestring): return data.filename # ? return data def int(self, field, default=None): """Get a field's value as an integer. If the field does not exist or cannot be coerced to an integer, this returns 'default'.""" try: data = int(self[field][0]) except: data = default return data def file(self, field): """Get a file field. If the field does not exist or is not a file, this returns None.""" try: data = self[field][0] except: data = None if isinstance(data, basestring): return None return data # Remember that square brackets == list, and these will make sense. __getitem__ = list __getattr__ = text