RequestParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "debug/Stream.h"
11 #include "http/one/RequestParser.h"
12 #include "http/ProtocolVersion.h"
13 #include "parser/Tokenizer.h"
14 #include "SquidConfig.h"
15 
18 {
19  // RFC 7230 section 2.6
20  /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
21  return method_.image().length() + uri_.length() + 12;
22 }
23 
37 void
39 {
41  if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
42  debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
43  "CRLF bytes received ahead of request-line. " <<
44  "Ignored due to relaxed_header_parser.");
45  // Be tolerant of prefix empty lines
46  // ie any series of either \n or \r\n with no other characters and no repeated \r
47  while (!buf_.isEmpty() && (buf_[0] == '\n' ||
48  (buf_[0] == '\r' && buf_.length() > 1 && buf_[1] == '\n'))) {
49  buf_.consume(1);
50  }
51  }
52 }
53 
61 bool
63 {
64  // method field is a sequence of TCHAR.
65  // Limit to 32 characters to prevent overly long sequences of non-HTTP
66  // being sucked in before mismatch is detected. 32 is itself annoyingly
67  // big but there are methods registered by IANA that reach 17 bytes:
68  // http://www.iana.org/assignments/http-methods
69  static const size_t maxMethodLength = 32; // TODO: make this configurable?
70 
71  SBuf methodFound;
72  if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
73  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed method");
74  parseStatusCode = Http::scBadRequest;
75  return false;
76  }
77  method_ = HttpRequestMethod(methodFound);
78 
79  if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
80  return false;
81 
82  return true;
83 }
84 
86 static const CharacterSet &
88 {
89  /* RFC 3986 section 2:
90  * "
91  * A URI is composed from a limited set of characters consisting of
92  * digits, letters, and a few graphic symbols.
93  * "
94  */
95  static const CharacterSet UriChars =
96  CharacterSet("URI-Chars","") +
97  // RFC 3986 section 2.2 - reserved characters
98  CharacterSet("gen-delims", ":/?#[]@") +
99  CharacterSet("sub-delims", "!$&'()*+,;=") +
100  // RFC 3986 section 2.3 - unreserved characters
102  // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
103  CharacterSet("pct-encoded", "%") +
105 
106  return UriChars;
107 }
108 
110 const CharacterSet &
112 {
114 #if USE_HTTP_VIOLATIONS
115  static const CharacterSet RelaxedExtended =
117  // accept whitespace (extended), it will be dealt with later
118  DelimiterCharacters() +
119  // RFC 2396 unwise character set which must never be transmitted
120  // in un-escaped form. But many web services do anyway.
121  CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
122  // UTF-8 because we want to be future-proof
123  CharacterSet("UTF-8", 128, 255);
124 
125  return RelaxedExtended;
126 #else
127  static const CharacterSet RelaxedCompliant =
129  // accept whitespace (extended), it will be dealt with later.
130  DelimiterCharacters();
131 
132  return RelaxedCompliant;
133 #endif
134  }
135 
136  // strict parse only accepts what the RFC say we can
137  return UriValidCharacters();
138 }
139 
140 bool
142 {
143  /* Arbitrary 64KB URI upper length limit.
144  *
145  * Not quite as arbitrary as it seems though. Old SquidString objects
146  * cannot store strings larger than 64KB, so we must limit until they
147  * have all been replaced with SBuf.
148  *
149  * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
150  * at least 8000 octets for the whole line, including method and version.
151  */
152  const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
153 
154  SBuf uriFound;
155  if (!tok.prefix(uriFound, RequestTargetCharacters())) {
156  parseStatusCode = Http::scBadRequest;
157  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing or malformed URI");
158  return false;
159  }
160 
161  if (uriFound.length() > maxUriLength) {
162  // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
163  parseStatusCode = Http::scUriTooLong;
164  debugs(33, ErrorLevel(), "ERROR: invalid request-line: " << uriFound.length() <<
165  "-byte URI exceeds " << maxUriLength << "-byte limit");
166  return false;
167  }
168 
169  uri_ = uriFound;
170  return true;
171 }
172 
173 bool
175 {
176  static const SBuf http1p0("HTTP/1.0");
177  static const SBuf http1p1("HTTP/1.1");
178  const auto savedTok = tok;
179 
180  // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
181  // the vast majority of cases.
182  if (tok.skipSuffix(http1p1)) {
183  msgProtocol_ = Http::ProtocolVersion(1, 1);
184  return true;
185  } else if (tok.skipSuffix(http1p0)) {
186  msgProtocol_ = Http::ProtocolVersion(1, 0);
187  return true;
188  } else {
189  // RFC 7230 section 2.6:
190  // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
191  static const CharacterSet period("Decimal point", ".");
192  static const SBuf proto("HTTP/");
193  SBuf majorDigit;
194  SBuf minorDigit;
195  if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
196  tok.skipOneTrailing(period) &&
197  tok.suffix(majorDigit, CharacterSet::DIGIT) &&
198  tok.skipSuffix(proto)) {
199  const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
200  // use '0.0' for unsupported multiple digit version numbers
201  const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
202  const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
203  msgProtocol_ = Http::ProtocolVersion(major, minor);
204  return true;
205  }
206  }
207 
208  // A GET request might use HTTP/0.9 syntax
209  if (method_ == Http::METHOD_GET) {
210  // RFC 1945 - no HTTP version field at all
211  tok = savedTok; // in case the URI ends with a digit
212  // report this assumption as an error if configured to triage parsing
213  debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
214  msgProtocol_ = Http::ProtocolVersion(0,9);
215  return true;
216  }
217 
218  debugs(33, ErrorLevel(), "ERROR: invalid request-line: not HTTP");
219  parseStatusCode = Http::scBadRequest;
220  return false;
221 }
222 
228 bool
229 Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
230 {
231  if (count <= 0) {
232  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing delimiter " << where);
233  parseStatusCode = Http::scBadRequest;
234  return false;
235  }
236 
237  // tolerant parser allows multiple whitespace characters between request-line fields
238  if (count > 1 && !Config.onoff.relaxed_header_parser) {
239  debugs(33, ErrorLevel(), "ERROR: invalid request-line: too many delimiters " << where);
240  parseStatusCode = Http::scBadRequest;
241  return false;
242  }
243 
244  return true;
245 }
246 
248 bool
250 {
252  (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
253  } else {
254  if (!tok.skipOneTrailing(CharacterSet::CR)) {
255  debugs(33, ErrorLevel(), "ERROR: invalid request-line: missing CR before LF");
256  parseStatusCode = Http::scBadRequest;
257  return false;
258  }
259  }
260  return true;
261 }
262 
274 int
276 {
277  debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
278  debugs(74, DBG_DATA, buf_);
279 
280  SBuf line;
281 
282  // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
283  // Now, the request line has to end at the first LF.
284  static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
285  Tokenizer lineTok(buf_);
286  if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
287  if (buf_.length() >= Config.maxRequestHeaderSize) {
288  /* who should we blame for our failure to parse this line? */
289 
290  Tokenizer methodTok(buf_);
291  if (!parseMethodField(methodTok))
292  return -1; // blame a bad method (or its delimiter)
293 
294  // assume it is the URI
295  debugs(74, ErrorLevel(), "ERROR: invalid request-line: URI exceeds " <<
296  Config.maxRequestHeaderSize << "-byte limit");
297  parseStatusCode = Http::scUriTooLong;
298  return -1;
299  }
300  debugs(74, 5, "Parser needs more data");
301  return 0;
302  }
303 
304  Tokenizer tok(line);
305 
306  if (!parseMethodField(tok))
307  return -1;
308 
309  /* now parse backwards, to leave just the URI */
310  if (!skipTrailingCrs(tok))
311  return -1;
312 
313  if (!parseHttpVersionField(tok))
314  return -1;
315 
316  if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
317  return -1;
318 
319  /* parsed everything before and after the URI */
320 
321  if (!parseUriField(tok))
322  return -1;
323 
324  if (!tok.atEnd()) {
325  debugs(33, ErrorLevel(), "ERROR: invalid request-line: garbage after URI");
326  parseStatusCode = Http::scBadRequest;
327  return -1;
328  }
329 
330  parseStatusCode = Http::scOkay;
331  buf_ = lineTok.remaining(); // incremental parse checkpoint
332  return 1;
333 }
334 
335 bool
337 {
338  const bool result = doParse(aBuf);
339  if (preserveParsed_) {
340  assert(aBuf.length() >= remaining().length());
341  parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
342  }
343 
344  return result;
345 }
346 
347 // raw is not a reference because a reference might point back to our own buf_ or parsed_
348 bool
350 {
351  buf_ = aBuf;
352  debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
353 
354  // stage 1: locate the request-line
355  if (parsingStage_ == HTTP_PARSE_NONE) {
356  skipGarbageLines();
357 
358  // if we hit something before EOS treat it as a message
359  if (!buf_.isEmpty())
360  parsingStage_ = HTTP_PARSE_FIRST;
361  else
362  return false;
363  }
364 
365  // stage 2: parse the request-line
366  if (parsingStage_ == HTTP_PARSE_FIRST) {
367  const int retcode = parseRequestFirstLine();
368 
369  // first-line (or a look-alike) found successfully.
370  if (retcode > 0) {
371  parsingStage_ = HTTP_PARSE_MIME;
372  }
373 
374  debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
375  debugs(74, 5, "request-line: method: " << method_);
376  debugs(74, 5, "request-line: url: " << uri_);
377  debugs(74, 5, "request-line: proto: " << msgProtocol_);
378  debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
379 
380  // syntax errors already
381  if (retcode < 0) {
382  parsingStage_ = HTTP_PARSE_DONE;
383  return false;
384  }
385  }
386 
387  // stage 3: locate the mime header block
388  if (parsingStage_ == HTTP_PARSE_MIME) {
389  // HTTP/1.x request-line is valid and parsing completed.
390  if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
391  if (parseStatusCode == Http::scHeaderTooLarge)
392  parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
393  return false;
394  }
395  }
396 
397  return !needsMoreData();
398 }
399 
static const CharacterSet & RequestTargetCharacters()
characters which Squid will accept in the HTTP request-target (URI)
int relaxed_header_parser
Definition: SquidConfig.h:315
@ scBadRequest
Definition: StatusCode.h:45
bool skipDelimiter(const size_t count, const char *where)
bool parseMethodField(Tokenizer &)
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition: Parser.h:28
Definition: SBuf.h:93
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:74
bool parseUriField(Tokenizer &)
struct SquidConfig::@97 onoff
SBuf uri_
raw copy of the original client request-line URI field
Definition: RequestParser.h:75
const SBuf & image() const
static const CharacterSet LF
Definition: CharacterSet.h:92
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
Http1::Parser::size_type firstLineSize() const override
size in bytes of the first line including CRLF terminator
#define DBG_DATA
Definition: Stream.h:40
SBuf::size_type size_type
Definition: Parser.h:43
static const CharacterSet CR
Definition: CharacterSet.h:80
const char * rawContent() const
Definition: SBuf.cc:509
HttpRequestMethod method_
what request method has been found on the first line
Definition: RequestParser.h:72
bool skipTrailingCrs(Tokenizer &tok)
Parse CRs at the end of request-line, just before the terminating LF.
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static const CharacterSet HEXDIG
Definition: CharacterSet.h:88
bool doParse(const SBuf &aBuf)
called from parse() to do the parsing
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
@ scRequestHeaderFieldsTooLarge
Definition: StatusCode.h:71
#define assert(EX)
Definition: assert.h:17
@ scUriTooLong
Definition: StatusCode.h:59
static const CharacterSet & UriValidCharacters()
the characters which truly are valid within URI
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:419
size_t maxRequestHeaderSize
Definition: SquidConfig.h:134
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition: Parser.h:24
bool parse(const SBuf &aBuf) override
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
Definition: parse.c:160
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
static const CharacterSet & RFC3986_UNRESERVED()
allowed URI characters that do not have a reserved purpose, RFC 3986
#define DBG_IMPORTANT
Definition: Stream.h:38
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition: Parser.cc:269
@ scOkay
Definition: StatusCode.h:27
bool parseHttpVersionField(Tokenizer &)
@ scHeaderTooLarge
Header too large to process.
Definition: StatusCode.h:89
@ METHOD_GET
Definition: MethodType.h:25
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:192
class SquidConfig Config
Definition: SquidConfig.cc:12
AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)
HTTP version label information.

 

Introduction

Documentation

Support

Miscellaneous