640 lines
22 KiB

import cgi
import os
import sys
import tempfile
import unittest
from collections import namedtuple
from io import StringIO, BytesIO
from test import support
from test.support import warnings_helper
class HackedSysModule:
# The regression test will have real values in sys.argv, which
# will completely confuse the test of the cgi module
argv = []
stdin = sys.stdin
cgi.sys = HackedSysModule()
class ComparableException:
def __init__(self, err):
self.err = err
def __str__(self):
return str(self.err)
def __eq__(self, anExc):
if not isinstance(anExc, Exception):
return NotImplemented
return (self.err.__class__ == anExc.__class__ and
self.err.args == anExc.args)
def __getattr__(self, attr):
return getattr(self.err, attr)
def do_test(buf, method):
env = {}
if method == "GET":
fp = None
env['REQUEST_METHOD'] = 'GET'
env['QUERY_STRING'] = buf
elif method == "POST":
fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes
env['REQUEST_METHOD'] = 'POST'
env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded'
env['CONTENT_LENGTH'] = str(len(buf))
else:
raise ValueError("unknown method: %s" % method)
try:
return cgi.parse(fp, env, strict_parsing=1)
except Exception as err:
return ComparableException(err)
parse_strict_test_cases = [
("", ValueError("bad query field: ''")),
("&", ValueError("bad query field: ''")),
("&&", ValueError("bad query field: ''")),
# Should the next few really be valid?
("=", {}),
("=&=", {}),
# This rest seem to make sense
("=a", {'': ['a']}),
("&=a", ValueError("bad query field: ''")),
("=a&", ValueError("bad query field: ''")),
("=&a", ValueError("bad query field: 'a'")),
("b=a", {'b': ['a']}),
("b+=a", {'b ': ['a']}),
("a=b=a", {'a': ['b=a']}),
("a=+b=a", {'a': [' b=a']}),
("&b=a", ValueError("bad query field: ''")),
("b&=a", ValueError("bad query field: 'b'")),
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
'cuyer': ['r'],
'expire': ['964546263'],
'kid': ['130003.300038'],
'lobale': ['en-US'],
'order_id': ['0bb2e248638833d48cb7fed300000f1b'],
'ss': ['env'],
'view': ['bustomer'],
}),
("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse",
{'SUBMIT': ['Browse'],
'_assigned_to': ['31392'],
'_category': ['100'],
'_status': ['1'],
'group_id': ['5470'],
'set': ['custom'],
})
]
def norm(seq):
return sorted(seq, key=repr)
def first_elts(list):
return [p[0] for p in list]
def first_second_elts(list):
return [(p[0], p[1][0]) for p in list]
def gen_result(data, environ):
encoding = 'latin-1'
fake_stdin = BytesIO(data.encode(encoding))
fake_stdin.seek(0)
form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding)
result = {}
for k, v in dict(form).items():
result[k] = isinstance(v, list) and form.getlist(k) or v.value
return result
class CgiTests(unittest.TestCase):
def test_parse_multipart(self):
fp = BytesIO(POSTDATA.encode('latin1'))
env = {'boundary': BOUNDARY.encode('latin1'),
'CONTENT-LENGTH': '558'}
result = cgi.parse_multipart(fp, env)
expected = {'submit': [' Add '], 'id': ['1234'],
'file': [b'Testing 123.\n'], 'title': ['']}
self.assertEqual(result, expected)
def test_parse_multipart_without_content_length(self):
POSTDATA = '''--JfISa01
Content-Disposition: form-data; name="submit-name"
just a string
--JfISa01--
'''
fp = BytesIO(POSTDATA.encode('latin1'))
env = {'boundary': 'JfISa01'.encode('latin1')}
result = cgi.parse_multipart(fp, env)
expected = {'submit-name': ['just a string\n']}
self.assertEqual(result, expected)
def test_parse_multipart_invalid_encoding(self):
BOUNDARY = "JfISa01"
POSTDATA = """--JfISa01
Content-Disposition: form-data; name="submit-name"
Content-Length: 3
\u2603
--JfISa01"""
fp = BytesIO(POSTDATA.encode('utf8'))
env = {'boundary': BOUNDARY.encode('latin1'),
'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))}
result = cgi.parse_multipart(fp, env, encoding="ascii",
errors="surrogateescape")
expected = {'submit-name': ["\udce2\udc98\udc83"]}
self.assertEqual(result, expected)
self.assertEqual("\u2603".encode('utf8'),
result["submit-name"][0].encode('utf8', 'surrogateescape'))
def test_fieldstorage_properties(self):
fs = cgi.FieldStorage()
self.assertFalse(fs)
self.assertIn("FieldStorage", repr(fs))
self.assertEqual(list(fs), list(fs.keys()))
fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue'))
self.assertTrue(fs)
def test_fieldstorage_invalid(self):
self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj",
environ={"REQUEST_METHOD":"PUT"})
self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar")
fs = cgi.FieldStorage(headers={'content-type':'text/plain'})
self.assertRaises(TypeError, bool, fs)
def test_strict(self):
for orig, expect in parse_strict_test_cases:
# Test basic parsing
d = do_test(orig, "GET")
self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig))
d = do_test(orig, "POST")
self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig))
env = {'QUERY_STRING': orig}
fs = cgi.FieldStorage(environ=env)
if isinstance(expect, dict):
# test dict interface
self.assertEqual(len(expect), len(fs))
self.assertCountEqual(expect.keys(), fs.keys())
##self.assertEqual(norm(expect.values()), norm(fs.values()))
##self.assertEqual(norm(expect.items()), norm(fs.items()))
self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
# test individual fields
for key in expect.keys():
expect_val = expect[key]
self.assertIn(key, fs)
if len(expect_val) > 1:
self.assertEqual(fs.getvalue(key), expect_val)
else:
self.assertEqual(fs.getvalue(key), expect_val[0])
def test_separator(self):
parse_semicolon = [
("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
(";", ValueError("bad query field: ''")),
(";;", ValueError("bad query field: ''")),
("=;a", ValueError("bad query field: 'a'")),
(";b=a", ValueError("bad query field: ''")),
("b;=a", ValueError("bad query field: 'b'")),
("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
]
for orig, expect in parse_semicolon:
env = {'QUERY_STRING': orig}
fs = cgi.FieldStorage(separator=';', environ=env)
if isinstance(expect, dict):
for key in expect.keys():
expect_val = expect[key]
self.assertIn(key, fs)
if len(expect_val) > 1:
self.assertEqual(fs.getvalue(key), expect_val)
else:
self.assertEqual(fs.getvalue(key), expect_val[0])
@warnings_helper.ignore_warnings(category=DeprecationWarning)
def test_log(self):
cgi.log("Testing")
cgi.logfp = StringIO()
cgi.initlog("%s", "Testing initlog 1")
cgi.log("%s", "Testing log 2")
self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n")
if os.path.exists(os.devnull):
cgi.logfp = None
cgi.logfile = os.devnull
cgi.initlog("%s", "Testing log 3")
self.addCleanup(cgi.closelog)
cgi.log("Testing log 4")
def test_fieldstorage_readline(self):
# FieldStorage uses readline, which has the capacity to read all
# contents of the input file into memory; we use readline's size argument
# to prevent that for files that do not contain any newlines in
# non-GET/HEAD requests
class TestReadlineFile:
def __init__(self, file):
self.file = file
self.numcalls = 0
def readline(self, size=None):
self.numcalls += 1
if size:
return self.file.readline(size)
else:
return self.file.readline()
def __getattr__(self, name):
file = self.__dict__['file']
a = getattr(file, name)
if not isinstance(a, int):
setattr(self, name, a)
return a
f = TestReadlineFile(tempfile.TemporaryFile("wb+"))
self.addCleanup(f.close)
f.write(b'x' * 256 * 1024)
f.seek(0)
env = {'REQUEST_METHOD':'PUT'}
fs = cgi.FieldStorage(fp=f, environ=env)
self.addCleanup(fs.file.close)
# if we're not chunking properly, readline is only called twice
# (by read_binary); if we are chunking properly, it will be called 5 times
# as long as the chunksize is 1 << 16.
self.assertGreater(f.numcalls, 2)
f.close()
def test_fieldstorage_multipart(self):
#Test basic FieldStorage multipart parsing
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH': '558'}
fp = BytesIO(POSTDATA.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 4)
expect = [{'name':'id', 'filename':None, 'value':'1234'},
{'name':'title', 'filename':None, 'value':''},
{'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
{'name':'submit', 'filename':None, 'value':' Add '}]
for x in range(len(fs.list)):
for k, exp in expect[x].items():
got = getattr(fs.list[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_multipart_leading_whitespace(self):
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH': '560'}
# Add some leading whitespace to our post data that will cause the
# first line to not be the innerboundary.
fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 4)
expect = [{'name':'id', 'filename':None, 'value':'1234'},
{'name':'title', 'filename':None, 'value':''},
{'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
{'name':'submit', 'filename':None, 'value':' Add '}]
for x in range(len(fs.list)):
for k, exp in expect[x].items():
got = getattr(fs.list[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_multipart_non_ascii(self):
#Test basic FieldStorage multipart parsing
env = {'REQUEST_METHOD':'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH':'558'}
for encoding in ['iso-8859-1','utf-8']:
fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding))
fs = cgi.FieldStorage(fp, environ=env,encoding=encoding)
self.assertEqual(len(fs.list), 1)
expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}]
for x in range(len(fs.list)):
for k, exp in expect[x].items():
got = getattr(fs.list[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_multipart_maxline(self):
# Issue #18167
maxline = 1 << 16
self.maxDiff = None
def check(content):
data = """---123
Content-Disposition: form-data; name="upload"; filename="fake.txt"
Content-Type: text/plain
%s
---123--
""".replace('\n', '\r\n') % content
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'REQUEST_METHOD': 'POST',
}
self.assertEqual(gen_result(data, environ),
{'upload': content.encode('latin1')})
check('x' * (maxline - 1))
check('x' * (maxline - 1) + '\r')
check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1))
def test_fieldstorage_multipart_w3c(self):
# Test basic FieldStorage multipart parsing (W3C sample)
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3),
'CONTENT_LENGTH': str(len(POSTDATA_W3))}
fp = BytesIO(POSTDATA_W3.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 2)
self.assertEqual(fs.list[0].name, 'submit-name')
self.assertEqual(fs.list[0].value, 'Larry')
self.assertEqual(fs.list[1].name, 'files')
files = fs.list[1].value
self.assertEqual(len(files), 2)
expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'},
{'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}]
for x in range(len(files)):
for k, exp in expect[x].items():
got = getattr(files[x], k)
self.assertEqual(got, exp)
def test_fieldstorage_part_content_length(self):
BOUNDARY = "JfISa01"
POSTDATA = """--JfISa01
Content-Disposition: form-data; name="submit-name"
Content-Length: 5
Larry
--JfISa01"""
env = {
'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
'CONTENT_LENGTH': str(len(POSTDATA))}
fp = BytesIO(POSTDATA.encode('latin-1'))
fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
self.assertEqual(len(fs.list), 1)
self.assertEqual(fs.list[0].name, 'submit-name')
self.assertEqual(fs.list[0].value, 'Larry')
def test_field_storage_multipart_no_content_length(self):
fp = BytesIO(b"""--MyBoundary
Content-Disposition: form-data; name="my-arg"; filename="foo"
Test
--MyBoundary--
""")
env = {
"REQUEST_METHOD": "POST",
"CONTENT_TYPE": "multipart/form-data; boundary=MyBoundary",
"wsgi.input": fp,
}
fields = cgi.FieldStorage(fp, environ=env)
self.assertEqual(len(fields["my-arg"].file.read()), 5)
def test_fieldstorage_as_context_manager(self):
fp = BytesIO(b'x' * 10)
env = {'REQUEST_METHOD': 'PUT'}
with cgi.FieldStorage(fp=fp, environ=env) as fs:
content = fs.file.read()
self.assertFalse(fs.file.closed)
self.assertTrue(fs.file.closed)
self.assertEqual(content, 'x' * 10)
with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'):
fs.file.read()
_qs_result = {
'key1': 'value1',
'key2': ['value2x', 'value2y'],
'key3': 'value3',
'key4': 'value4'
}
def testQSAndUrlEncode(self):
data = "key2=value2x&key3=value3&key4=value4"
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
'QUERY_STRING': 'key1=value1&key2=value2y',
'REQUEST_METHOD': 'POST',
}
v = gen_result(data, environ)
self.assertEqual(self._qs_result, v)
def test_max_num_fields(self):
# For application/x-www-form-urlencoded
data = '&'.join(['a=a']*11)
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
'REQUEST_METHOD': 'POST',
}
with self.assertRaises(ValueError):
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=10,
)
# For multipart/form-data
data = """---123
Content-Disposition: form-data; name="a"
3
---123
Content-Type: application/x-www-form-urlencoded
a=4
---123
Content-Type: application/x-www-form-urlencoded
a=5
---123--
"""
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'QUERY_STRING': 'a=1&a=2',
'REQUEST_METHOD': 'POST',
}
# 2 GET entities
# 1 top level POST entities
# 1 entity within the second POST entity
# 1 entity within the third POST entity
with self.assertRaises(ValueError):
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=4,
)
cgi.FieldStorage(
fp=BytesIO(data.encode()),
environ=environ,
max_num_fields=5,
)
def testQSAndFormData(self):
data = """---123
Content-Disposition: form-data; name="key2"
value2y
---123
Content-Disposition: form-data; name="key3"
value3
---123
Content-Disposition: form-data; name="key4"
value4
---123--
"""
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'QUERY_STRING': 'key1=value1&key2=value2x',
'REQUEST_METHOD': 'POST',
}
v = gen_result(data, environ)
self.assertEqual(self._qs_result, v)
def testQSAndFormDataFile(self):
data = """---123
Content-Disposition: form-data; name="key2"
value2y
---123
Content-Disposition: form-data; name="key3"
value3
---123
Content-Disposition: form-data; name="key4"
value4
---123
Content-Disposition: form-data; name="upload"; filename="fake.txt"
Content-Type: text/plain
this is the content of the fake file
---123--
"""
environ = {
'CONTENT_LENGTH': str(len(data)),
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
'QUERY_STRING': 'key1=value1&key2=value2x',
'REQUEST_METHOD': 'POST',
}
result = self._qs_result.copy()
result.update({
'upload': b'this is the content of the fake file\n'
})
v = gen_result(data, environ)
self.assertEqual(result, v)
def test_parse_header(self):
self.assertEqual(
cgi.parse_header("text/plain"),
("text/plain", {}))
self.assertEqual(
cgi.parse_header("text/vnd.just.made.this.up ; "),
("text/vnd.just.made.this.up", {}))
self.assertEqual(
cgi.parse_header("text/plain;charset=us-ascii"),
("text/plain", {"charset": "us-ascii"}))
self.assertEqual(
cgi.parse_header('text/plain ; charset="us-ascii"'),
("text/plain", {"charset": "us-ascii"}))
self.assertEqual(
cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'),
("text/plain", {"charset": "us-ascii", "another": "opt"}))
self.assertEqual(
cgi.parse_header('attachment; filename="silly.txt"'),
("attachment", {"filename": "silly.txt"}))
self.assertEqual(
cgi.parse_header('attachment; filename="strange;name"'),
("attachment", {"filename": "strange;name"}))
self.assertEqual(
cgi.parse_header('attachment; filename="strange;name";size=123;'),
("attachment", {"filename": "strange;name", "size": "123"}))
self.assertEqual(
cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
("form-data", {"name": "files", "filename": 'fo"o;bar'}))
def test_all(self):
not_exported = {
"logfile", "logfp", "initlog", "dolog", "nolog", "closelog", "log",
"maxlen", "valid_boundary"}
support.check__all__(self, cgi, not_exported=not_exported)
BOUNDARY = "---------------------------721837373350705526688164684"
POSTDATA = """-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="id"
1234
-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="title"
-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="file"; filename="test.txt"
Content-Type: text/plain
Testing 123.
-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="submit"
Add\x20
-----------------------------721837373350705526688164684--
"""
POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684
Content-Disposition: form-data; name="id"
\xe7\xf1\x80
-----------------------------721837373350705526688164684
"""
# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4
BOUNDARY_W3 = "AaB03x"
POSTDATA_W3 = """--AaB03x
Content-Disposition: form-data; name="submit-name"
Larry
--AaB03x
Content-Disposition: form-data; name="files"
Content-Type: multipart/mixed; boundary=BbC04y
--BbC04y
Content-Disposition: file; filename="file1.txt"
Content-Type: text/plain
... contents of file1.txt ...
--BbC04y
Content-Disposition: file; filename="file2.gif"
Content-Type: image/gif
Content-Transfer-Encoding: binary
...contents of file2.gif...
--BbC04y--
--AaB03x--
"""
if __name__ == '__main__':
unittest.main()