Parser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "base/CharacterSet.h"
11 #include "debug/Stream.h"
12 #include "http/one/Parser.h"
13 #include "mime_header.h"
14 #include "parser/Tokenizer.h"
15 #include "SquidConfig.h"
16 
18 const SBuf Http::One::Parser::Http1magic("HTTP/1.");
19 
21 {
22  static const SBuf crlf("\r\n");
23  return crlf;
24 }
25 
26 void
28 {
30  buf_ = nullptr;
33 }
34 
36 static const CharacterSet &
38 {
39  // RFC 7230 section 3.5
40  // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
41  // or bare CR as whitespace between request-line fields
42  static const CharacterSet RelaxedDels =
45  CharacterSet("VT,FF","\x0B\x0C") +
46  CharacterSet::CR).rename("relaxed-WSP");
47 
48  return RelaxedDels;
49 }
50 
51 const CharacterSet &
53 {
56 }
57 
58 const CharacterSet &
60 {
63 }
64 
65 void
67 {
69  return;
70 
71  tok.skipRequired("line-terminating CRLF", Http1::CrLf());
72 }
73 
75 static const CharacterSet &
77 {
78  static const CharacterSet line = CharacterSet::LF.complement("non-LF");
79  return line;
80 }
81 
96 void
98 {
99  Tokenizer tok(mimeHeaderBlock_);
100  while (tok.skipOne(RelaxedDelimiterCharacters())) {
101  (void)tok.skipAll(LineCharacters()); // optional line content
102  // LF terminator is required.
103  // trust headersEnd() to ensure that we have at least one LF
104  (void)tok.skipOne(CharacterSet::LF);
105  }
106 
107  // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
108  // then we skipped everything, including that terminating LF.
109  // Restore the terminating CRLF if needed.
110  if (tok.atEnd())
111  mimeHeaderBlock_ = Http1::CrLf();
112  else
113  mimeHeaderBlock_ = tok.remaining();
114  // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
115 }
116 
131 void
133 {
134  Tokenizer tok(mimeHeaderBlock_);
135  const auto szLimit = mimeHeaderBlock_.length();
136  mimeHeaderBlock_.clear();
137  // prevent the mime sender being able to make append() realloc/grow multiple times.
138  mimeHeaderBlock_.reserveSpace(szLimit);
139 
140  static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
141 
142  while (!tok.atEnd()) {
143  const SBuf all(tok.remaining());
144  const auto blobLen = tok.skipAll(nonCRLF); // may not be there
145  const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
146  const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
147 
148  if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
149  mimeHeaderBlock_.append(all.substr(0, blobLen));
150  mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
151  } else
152  mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
153  }
154 }
155 
156 bool
157 Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
158 {
159  // MIME headers block exist in (only) HTTP/1.x and ICY
160  const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
161  msgProtocol_.protocol == AnyP::PROTO_ICY ||
162  hackExpectsMime_;
163 
164  if (expectMime) {
165  /* NOTE: HTTP/0.9 messages do not have a mime header block.
166  * So the rest of the code will need to deal with '0'-byte headers
167  * (ie, none, so don't try parsing em)
168  */
169  bool containsObsFold;
170  if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
171 
172  // Squid could handle these headers, but admin does not want to
173  if (firstLineSize() + mimeHeaderBytes >= limit) {
174  debugs(33, 5, "Too large " << which);
175  parseStatusCode = Http::scHeaderTooLarge;
176  buf_.consume(mimeHeaderBytes);
177  parsingStage_ = HTTP_PARSE_DONE;
178  return false;
179  }
180 
181  mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
182  cleanMimePrefix();
183  if (containsObsFold)
184  unfoldMime();
185 
186  debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
187 
188  } else { // headersEnd() == 0
189  if (buf_.length()+firstLineSize() >= limit) {
190  debugs(33, 5, "Too large " << which);
191  parseStatusCode = Http::scHeaderTooLarge;
192  parsingStage_ = HTTP_PARSE_DONE;
193  } else
194  debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
195  return false;
196  }
197 
198  } else
199  debugs(33, 3, "Missing HTTP/1.x identifier");
200 
201  // NP: we do not do any further stages here yet so go straight to DONE
202  parsingStage_ = HTTP_PARSE_DONE;
203 
204  return true;
205 }
206 
207 // arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()
208 #define GET_HDR_SZ 1024
209 
210 // BUG: returns only the first header line with given name,
211 // ignores multi-line headers and obs-fold headers
212 char *
214 {
215  if (!headerBlockSize())
216  return nullptr;
217 
218  LOCAL_ARRAY(char, header, GET_HDR_SZ);
219  const char *name = "Host";
220  const int namelen = strlen(name);
221 
222  debugs(25, 5, "looking for " << name);
223 
224  // while we can find more LF in the SBuf
225  Tokenizer tok(mimeHeaderBlock_);
226  SBuf p;
227 
228  while (tok.prefix(p, LineCharacters())) {
229  if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
230  break; // error. reached invalid octet or end of buffer instead of an LF ??
231 
232  // header lines must start with the name (case insensitive)
233  if (p.substr(0, namelen).caseCmp(name, namelen))
234  continue;
235 
236  // then a COLON
237  if (p[namelen] != ':')
238  continue;
239 
240  // drop any trailing *CR sequence
241  p.trim(Http1::CrLf(), false, true);
242 
243  debugs(25, 5, "checking " << p);
244  p.consume(namelen + 1);
245 
246  // TODO: optimize SBuf::trim to take CharacterSet directly
247  Tokenizer t(p);
248  t.skipAll(CharacterSet::WSP);
249  p = t.remaining();
250 
251  // prevent buffer overrun on char header[];
252  p.chop(0, sizeof(header)-1);
253 
254  // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]
255  static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
256  if (p.findFirstNotOf(hostChars) != SBuf::npos)
257  break; // error. line contains character not accepted in Host header
258 
259  // return the header field-value
260  SBufToCstring(header, p);
261  debugs(25, 5, "returning " << header);
262  return header;
263  }
264 
265  return nullptr;
266 }
267 
268 int
270 {
272 }
273 
275 namespace Http::One {
276 static void
278 {
279  const auto count = tok.skipAll(bwsChars);
280 
281  if (tok.atEnd())
282  throw InsufficientInput(); // even if count is positive
283 
284  if (count) {
285  // Generating BWS is a MUST-level violation so warn about it as needed.
286  debugs(33, ErrorLevel(), "found " << count << " BWS octets");
287  // RFC 7230 says we MUST parse BWS, so we fall through even if
288  // Config.onoff.relaxed_header_parser is off.
289  }
290  // else we successfully "parsed" an empty BWS sequence
291 
292  // success: no more BWS characters expected
293 }
294 } // namespace Http::One
295 
296 void
298 {
300 }
301 
302 void
304 {
306 }
307 
static const CharacterSet & DelimiterCharacters()
Definition: Parser.cc:59
void cleanMimePrefix()
Definition: Parser.cc:97
int relaxed_header_parser
Definition: SquidConfig.h:315
AnyP::ProtocolVersion ProtocolVersion()
Protocol version to use in Http::Message structures wrapping FTP messages.
Definition: Elements.cc:24
#define LOCAL_ARRAY(type, name, size)
Definition: squid.h:62
static const CharacterSet & LineCharacters()
all characters except the LF line terminator
Definition: Parser.cc:76
Definition: SBuf.h:93
virtual void clear()=0
Definition: Parser.cc:27
ParseState parsingStage_
what stage the parser is currently up to
Definition: Parser.h:149
void SBufToCstring(char *d, const SBuf &s)
Definition: SBuf.h:756
CharacterSet complement(const char *complementLabel=nullptr) const
Definition: CharacterSet.cc:74
struct SquidConfig::@97 onoff
static const CharacterSet LF
Definition: CharacterSet.h:92
SBuf substr(size_type pos, size_type n=npos) const
Definition: SBuf.cc:576
void clear()
Definition: SBuf.cc:175
static const CharacterSet ALPHA
Definition: CharacterSet.h:76
SBuf & chop(size_type pos, size_type n=npos)
Definition: SBuf.cc:530
static const CharacterSet CR
Definition: CharacterSet.h:80
static const CharacterSet & WhitespaceCharacters()
Definition: Parser.cc:52
static const CharacterSet WSP
Definition: CharacterSet.h:98
SBuf consume(size_type n=npos)
Definition: SBuf.cc:481
static void ParseBws_(Parser::Tokenizer &tok, const CharacterSet &bwsChars)
Definition: Parser.cc:277
MemBlob::size_type size_type
Definition: SBuf.h:96
static const CharacterSet & RelaxedDelimiterCharacters()
characters HTTP permits tolerant parsers to accept as delimiters
Definition: Parser.cc:37
void skipLineTerminator(Tokenizer &) const
Definition: Parser.cc:66
bool grabMimeBlock(const char *which, const size_t limit)
Definition: Parser.cc:157
SBuf mimeHeaderBlock_
buffer holding the mime headers (if any)
Definition: Parser.h:155
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
SBuf buf_
bytes remaining to be parsed
Definition: Parser.h:146
static const CharacterSet HTAB
Definition: CharacterSet.h:90
static const CharacterSet DIGIT
Definition: CharacterSet.h:84
SBuf & trim(const SBuf &toRemove, bool atBeginning=true, bool atEnd=true)
Definition: SBuf.cc:551
char * getHostHeaderField()
Definition: Parser.cc:213
CharacterSet & rename(const char *label)
change name; handy in const declarations that use operators
Definition: CharacterSet.h:61
static const size_type npos
Definition: SBuf.h:100
size_type findFirstNotOf(const CharacterSet &set, size_type startPos=0) const
Definition: SBuf.cc:746
void ParseStrictBws(Parser::Tokenizer &)
Definition: Parser.cc:303
static const SBuf Http1magic
RFC 7230 section 2.6 - 7 magic octets.
Definition: Parser.h:143
void unfoldMime()
Definition: Parser.cc:132
void ParseBws(Parser::Tokenizer &)
Definition: Parser.cc:297
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
@ PROTO_HTTP
Definition: ProtocolType.h:25
Definition: parse.c:160
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition: Parser.h:152
#define DBG_IMPORTANT
Definition: Stream.h:38
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
int caseCmp(const SBuf &S, const size_type n) const
shorthand version for case-insensitive compare()
Definition: SBuf.h:287
int ErrorLevel()
the right debugs() level for logging HTTP violation messages
Definition: Parser.cc:269
@ PROTO_ICY
Definition: ProtocolType.h:37
@ scHeaderTooLarge
Header too large to process.
Definition: StatusCode.h:89
size_t headersEnd(const char *mime, size_t l, bool &containsObsFold)
Definition: mime_header.cc:17
static const CharacterSet SP
Definition: CharacterSet.h:94
#define GET_HDR_SZ
Definition: Parser.cc:208
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:192
common part of ParseBws() and ParseStrctBws()
Definition: forward.h:17
const CharacterSet crlf("crlf","\r\n")
Definition: Elements.cc:12
class SquidConfig Config
Definition: SquidConfig.cc:12
const SBuf & CrLf()
CRLF textual representation.
Definition: Parser.cc:20

 

Introduction

Documentation

Support

Miscellaneous