Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

message.py 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. # -*- coding: utf-8 -
  2. #
  3. # This file is part of gunicorn released under the MIT license.
  4. # See the NOTICE for more information.
  5. import re
  6. import socket
  7. from errno import ENOTCONN
  8. from gunicorn._compat import bytes_to_str
  9. from gunicorn.http.unreader import SocketUnreader
  10. from gunicorn.http.body import ChunkedReader, LengthReader, EOFReader, Body
  11. from gunicorn.http.errors import (InvalidHeader, InvalidHeaderName, NoMoreData,
  12. InvalidRequestLine, InvalidRequestMethod, InvalidHTTPVersion,
  13. LimitRequestLine, LimitRequestHeaders)
  14. from gunicorn.http.errors import InvalidProxyLine, ForbiddenProxyRequest
  15. from gunicorn.six import BytesIO
  16. from gunicorn._compat import urlsplit
  17. MAX_REQUEST_LINE = 8190
  18. MAX_HEADERS = 32768
  19. MAX_HEADERFIELD_SIZE = 8190
  20. HEADER_RE = re.compile("[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
  21. METH_RE = re.compile(r"[A-Z0-9$-_.]{3,20}")
  22. VERSION_RE = re.compile(r"HTTP/(\d+).(\d+)")
  23. class Message(object):
  24. def __init__(self, cfg, unreader):
  25. self.cfg = cfg
  26. self.unreader = unreader
  27. self.version = None
  28. self.headers = []
  29. self.trailers = []
  30. self.body = None
  31. # set headers limits
  32. self.limit_request_fields = cfg.limit_request_fields
  33. if (self.limit_request_fields <= 0
  34. or self.limit_request_fields > MAX_HEADERS):
  35. self.limit_request_fields = MAX_HEADERS
  36. self.limit_request_field_size = cfg.limit_request_field_size
  37. if (self.limit_request_field_size < 0
  38. or self.limit_request_field_size > MAX_HEADERFIELD_SIZE):
  39. self.limit_request_field_size = MAX_HEADERFIELD_SIZE
  40. # set max header buffer size
  41. max_header_field_size = self.limit_request_field_size or MAX_HEADERFIELD_SIZE
  42. self.max_buffer_headers = self.limit_request_fields * \
  43. (max_header_field_size + 2) + 4
  44. unused = self.parse(self.unreader)
  45. self.unreader.unread(unused)
  46. self.set_body_reader()
  47. def parse(self):
  48. raise NotImplementedError()
  49. def parse_headers(self, data):
  50. headers = []
  51. # Split lines on \r\n keeping the \r\n on each line
  52. lines = [bytes_to_str(line) + "\r\n" for line in data.split(b"\r\n")]
  53. # Parse headers into key/value pairs paying attention
  54. # to continuation lines.
  55. while len(lines):
  56. if len(headers) >= self.limit_request_fields:
  57. raise LimitRequestHeaders("limit request headers fields")
  58. # Parse initial header name : value pair.
  59. curr = lines.pop(0)
  60. header_length = len(curr)
  61. if curr.find(":") < 0:
  62. raise InvalidHeader(curr.strip())
  63. name, value = curr.split(":", 1)
  64. name = name.rstrip(" \t").upper()
  65. if HEADER_RE.search(name):
  66. raise InvalidHeaderName(name)
  67. name, value = name.strip(), [value.lstrip()]
  68. # Consume value continuation lines
  69. while len(lines) and lines[0].startswith((" ", "\t")):
  70. curr = lines.pop(0)
  71. header_length += len(curr)
  72. if header_length > self.limit_request_field_size > 0:
  73. raise LimitRequestHeaders("limit request headers "
  74. + "fields size")
  75. value.append(curr)
  76. value = ''.join(value).rstrip()
  77. if header_length > self.limit_request_field_size > 0:
  78. raise LimitRequestHeaders("limit request headers fields size")
  79. headers.append((name, value))
  80. return headers
  81. def set_body_reader(self):
  82. chunked = False
  83. content_length = None
  84. for (name, value) in self.headers:
  85. if name == "CONTENT-LENGTH":
  86. content_length = value
  87. elif name == "TRANSFER-ENCODING":
  88. chunked = value.lower() == "chunked"
  89. elif name == "SEC-WEBSOCKET-KEY1":
  90. content_length = 8
  91. if chunked:
  92. self.body = Body(ChunkedReader(self, self.unreader))
  93. elif content_length is not None:
  94. try:
  95. content_length = int(content_length)
  96. except ValueError:
  97. raise InvalidHeader("CONTENT-LENGTH", req=self)
  98. if content_length < 0:
  99. raise InvalidHeader("CONTENT-LENGTH", req=self)
  100. self.body = Body(LengthReader(self.unreader, content_length))
  101. else:
  102. self.body = Body(EOFReader(self.unreader))
  103. def should_close(self):
  104. for (h, v) in self.headers:
  105. if h == "CONNECTION":
  106. v = v.lower().strip()
  107. if v == "close":
  108. return True
  109. elif v == "keep-alive":
  110. return False
  111. break
  112. return self.version <= (1, 0)
  113. class Request(Message):
  114. def __init__(self, cfg, unreader, req_number=1):
  115. self.method = None
  116. self.uri = None
  117. self.path = None
  118. self.query = None
  119. self.fragment = None
  120. # get max request line size
  121. self.limit_request_line = cfg.limit_request_line
  122. if (self.limit_request_line < 0
  123. or self.limit_request_line >= MAX_REQUEST_LINE):
  124. self.limit_request_line = MAX_REQUEST_LINE
  125. self.req_number = req_number
  126. self.proxy_protocol_info = None
  127. super(Request, self).__init__(cfg, unreader)
  128. def get_data(self, unreader, buf, stop=False):
  129. data = unreader.read()
  130. if not data:
  131. if stop:
  132. raise StopIteration()
  133. raise NoMoreData(buf.getvalue())
  134. buf.write(data)
  135. def parse(self, unreader):
  136. buf = BytesIO()
  137. self.get_data(unreader, buf, stop=True)
  138. # get request line
  139. line, rbuf = self.read_line(unreader, buf, self.limit_request_line)
  140. # proxy protocol
  141. if self.proxy_protocol(bytes_to_str(line)):
  142. # get next request line
  143. buf = BytesIO()
  144. buf.write(rbuf)
  145. line, rbuf = self.read_line(unreader, buf, self.limit_request_line)
  146. self.parse_request_line(bytes_to_str(line))
  147. buf = BytesIO()
  148. buf.write(rbuf)
  149. # Headers
  150. data = buf.getvalue()
  151. idx = data.find(b"\r\n\r\n")
  152. done = data[:2] == b"\r\n"
  153. while True:
  154. idx = data.find(b"\r\n\r\n")
  155. done = data[:2] == b"\r\n"
  156. if idx < 0 and not done:
  157. self.get_data(unreader, buf)
  158. data = buf.getvalue()
  159. if len(data) > self.max_buffer_headers:
  160. raise LimitRequestHeaders("max buffer headers")
  161. else:
  162. break
  163. if done:
  164. self.unreader.unread(data[2:])
  165. return b""
  166. self.headers = self.parse_headers(data[:idx])
  167. ret = data[idx + 4:]
  168. buf = BytesIO()
  169. return ret
  170. def read_line(self, unreader, buf, limit=0):
  171. data = buf.getvalue()
  172. while True:
  173. idx = data.find(b"\r\n")
  174. if idx >= 0:
  175. # check if the request line is too large
  176. if idx > limit > 0:
  177. raise LimitRequestLine(idx, limit)
  178. break
  179. elif len(data) - 2 > limit > 0:
  180. raise LimitRequestLine(len(data), limit)
  181. self.get_data(unreader, buf)
  182. data = buf.getvalue()
  183. return (data[:idx], # request line,
  184. data[idx + 2:]) # residue in the buffer, skip \r\n
  185. def proxy_protocol(self, line):
  186. """\
  187. Detect, check and parse proxy protocol.
  188. :raises: ForbiddenProxyRequest, InvalidProxyLine.
  189. :return: True for proxy protocol line else False
  190. """
  191. if not self.cfg.proxy_protocol:
  192. return False
  193. if self.req_number != 1:
  194. return False
  195. if not line.startswith("PROXY"):
  196. return False
  197. self.proxy_protocol_access_check()
  198. self.parse_proxy_protocol(line)
  199. return True
  200. def proxy_protocol_access_check(self):
  201. # check in allow list
  202. if isinstance(self.unreader, SocketUnreader):
  203. try:
  204. remote_host = self.unreader.sock.getpeername()[0]
  205. except socket.error as e:
  206. if e.args[0] == ENOTCONN:
  207. raise ForbiddenProxyRequest("UNKNOW")
  208. raise
  209. if ("*" not in self.cfg.proxy_allow_ips and
  210. remote_host not in self.cfg.proxy_allow_ips):
  211. raise ForbiddenProxyRequest(remote_host)
  212. def parse_proxy_protocol(self, line):
  213. bits = line.split()
  214. if len(bits) != 6:
  215. raise InvalidProxyLine(line)
  216. # Extract data
  217. proto = bits[1]
  218. s_addr = bits[2]
  219. d_addr = bits[3]
  220. # Validation
  221. if proto not in ["TCP4", "TCP6"]:
  222. raise InvalidProxyLine("protocol '%s' not supported" % proto)
  223. if proto == "TCP4":
  224. try:
  225. socket.inet_pton(socket.AF_INET, s_addr)
  226. socket.inet_pton(socket.AF_INET, d_addr)
  227. except socket.error:
  228. raise InvalidProxyLine(line)
  229. elif proto == "TCP6":
  230. try:
  231. socket.inet_pton(socket.AF_INET6, s_addr)
  232. socket.inet_pton(socket.AF_INET6, d_addr)
  233. except socket.error:
  234. raise InvalidProxyLine(line)
  235. try:
  236. s_port = int(bits[4])
  237. d_port = int(bits[5])
  238. except ValueError:
  239. raise InvalidProxyLine("invalid port %s" % line)
  240. if not ((0 <= s_port <= 65535) and (0 <= d_port <= 65535)):
  241. raise InvalidProxyLine("invalid port %s" % line)
  242. # Set data
  243. self.proxy_protocol_info = {
  244. "proxy_protocol": proto,
  245. "client_addr": s_addr,
  246. "client_port": s_port,
  247. "proxy_addr": d_addr,
  248. "proxy_port": d_port
  249. }
  250. def parse_request_line(self, line):
  251. bits = line.split(None, 2)
  252. if len(bits) != 3:
  253. raise InvalidRequestLine(line)
  254. # Method
  255. if not METH_RE.match(bits[0]):
  256. raise InvalidRequestMethod(bits[0])
  257. self.method = bits[0].upper()
  258. # URI
  259. # When the path starts with //, urlsplit considers it as a
  260. # relative uri while the RDF says it shouldnt
  261. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
  262. # considers it as an absolute url.
  263. # fix issue #297
  264. if bits[1].startswith("//"):
  265. self.uri = bits[1][1:]
  266. else:
  267. self.uri = bits[1]
  268. try:
  269. parts = urlsplit(self.uri)
  270. except ValueError:
  271. raise InvalidRequestLine(line)
  272. self.path = parts.path or ""
  273. self.query = parts.query or ""
  274. self.fragment = parts.fragment or ""
  275. # Version
  276. match = VERSION_RE.match(bits[2])
  277. if match is None:
  278. raise InvalidHTTPVersion(bits[2])
  279. self.version = (int(match.group(1)), int(match.group(2)))
  280. def set_body_reader(self):
  281. super(Request, self).set_body_reader()
  282. if isinstance(self.body.reader, EOFReader):
  283. self.body = Body(LengthReader(self.unreader, 0))