TeChunkedParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "base/TextException.h"
11 #include "debug/Stream.h"
13 #include "http/one/Tokenizer.h"
14 #include "http/ProtocolVersion.h"
15 #include "MemBuf.h"
16 #include "parser/Tokenizer.h"
17 #include "Parsing.h"
18 #include "sbuf/Stream.h"
19 #include "SquidConfig.h"
20 
22  customExtensionValueParser(nullptr)
23 {
24  // chunked encoding only exists in HTTP/1.1
26 
27  clear();
28 }
29 
30 void
32 {
33  parsingStage_ = Http1::HTTP_PARSE_NONE;
34  buf_.clear();
35  theChunkSize = theLeftBodySize = 0;
36  theOut = nullptr;
37  // XXX: We do not reset customExtensionValueParser here. Based on the
38  // clear() API description, we must, but it makes little sense and could
39  // break method callers if they appear because some of them may forget to
40  // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
41  // parent class and this unnecessary method with it.
42 }
43 
44 bool
46 {
47  buf_ = aBuf; // sync buffers first so calls to remaining() work properly if nothing done.
48 
49  if (buf_.isEmpty()) // nothing to do (yet)
50  return false;
51 
52  debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
53 
54  Must(!buf_.isEmpty() && theOut);
55 
56  if (parsingStage_ == Http1::HTTP_PARSE_NONE)
57  parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
58 
59  Tokenizer tok(buf_);
60 
61  // loop for as many chunks as we can
62  // use do-while instead of while so that we can incrementally
63  // restart in the middle of a chunk/frame
64  do {
65 
66  if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
67  return false;
68 
69  if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
70  return false;
71 
72  if (parsingStage_ == Http1::HTTP_PARSE_MIME && !grabMimeBlock("Trailers", 64*1024 /* 64KB max */))
73  return false;
74 
75  // loop for as many chunks as we can
76  } while (parsingStage_ == Http1::HTTP_PARSE_CHUNK_SZ && parseChunkSize(tok));
77 
78  return !needsMoreData() && !needsMoreSpace();
79 }
80 
81 bool
83 {
84  assert(theOut);
85  return parsingStage_ == Http1::HTTP_PARSE_CHUNK && !theOut->hasPotentialSpace();
86 }
87 
89 bool
91 {
92  Must(theChunkSize <= 0); // Should(), really
93 
94  static const SBuf bannedHexPrefixLower("0x");
95  static const SBuf bannedHexPrefixUpper("0X");
96  if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper))
97  throw TextException("chunk starts with 0x", Here());
98 
99  int64_t size = -1;
100  if (tok.int64(size, 16, false) && !tok.atEnd()) {
101  if (size < 0)
102  throw TexcHere("negative chunk size");
103 
104  theChunkSize = theLeftBodySize = size;
105  debugs(94,7, "found chunk: " << theChunkSize);
106  buf_ = tok.remaining(); // parse checkpoint
107  parsingStage_ = Http1::HTTP_PARSE_CHUNK_EXT;
108  return true;
109 
110  } else if (tok.atEnd()) {
111  return false; // need more data
112  }
113 
114  // else error
115  throw TexcHere("corrupted chunk size");
116  return false; // should not be reachable
117 }
118 
122 bool
124 {
125  // Code becomes much simpler when incremental parsing functions throw on
126  // bad or insufficient input, like in the code below. TODO: Expand up.
127  try {
128  // Bug 4492: IBM_HTTP_Server sends SP after chunk-size.
129  // No ParseBws() here because it may consume CR required further below.
131 
132  parseChunkExtensions(tok); // a possibly empty chunk-ext list
133  tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
134  buf_ = tok.remaining();
135  parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
136  return true;
137  } catch (const InsufficientInput &) {
138  tok.reset(buf_); // backtrack to the last commit point
139  return false;
140  }
141  // other exceptions bubble up to kill message parsing
142 }
143 
146 void
148 {
149  do {
150  auto tok = callerTok;
151 
152  ParseBws(tok);
153 
154  if (!tok.skip(';'))
155  return; // reached the end of extensions (if any)
156 
157  parseOneChunkExtension(tok);
158  buf_ = tok.remaining(); // got one extension
159  callerTok = tok;
160  } while (true);
161 }
162 
163 void
165 {
166  const auto ignoredValue = tokenOrQuotedString(tok);
167  debugs(94, 5, extName << " with value " << ignoredValue);
168 }
169 
172 void
174 {
175  auto tok = callerTok;
176 
177  ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
178 
179  const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
180  callerTok = tok; // in case we determine that this is a valueless chunk-ext
181 
182  ParseBws(tok);
183 
184  if (!tok.skip('='))
185  return; // parsed a valueless chunk-ext
186 
187  ParseBws(tok);
188 
189  // optimization: the only currently supported extension needs last-chunk
190  if (!theChunkSize && customExtensionValueParser)
191  customExtensionValueParser->parse(tok, extName);
192  else
194 
195  callerTok = tok;
196 }
197 
198 bool
200 {
201  if (theLeftBodySize > 0) {
202  buf_ = tok.remaining(); // sync buffers before buf_ use
203 
204  // TODO fix type mismatches and casting for these
205  const size_t availSize = min(theLeftBodySize, (uint64_t)buf_.length());
206  const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
207 
208  theOut->append(buf_.rawContent(), safeSize);
209  buf_.consume(safeSize);
210  theLeftBodySize -= safeSize;
211 
212  tok.reset(buf_); // sync buffers after consume()
213  }
214 
215  if (theLeftBodySize == 0)
216  return parseChunkEnd(tok);
217  else
218  Must(needsMoreData() || needsMoreSpace());
219 
220  return true;
221 }
222 
223 bool
225 {
226  Must(theLeftBodySize == 0); // Should(), really
227 
228  try {
229  tok.skipRequired("chunk CRLF", Http1::CrLf());
230  buf_ = tok.remaining(); // parse checkpoint
231  theChunkSize = 0; // done with the current chunk
232  parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
233  return true;
234  }
235  catch (const InsufficientInput &) {
236  return false;
237  }
238  // other exceptions bubble up to kill message parsing
239 }
240 
#define Here()
source code location of the caller
Definition: Here.h:15
void parseOneChunkExtension(Tokenizer &)
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition: Parser.h:28
Definition: SBuf.h:93
bool parseChunkMetadataSuffix(Tokenizer &)
@ HTTP_PARSE_CHUNK
HTTP/1.1 chunked encoding chunk-data.
Definition: Parser.h:27
#define TexcHere(msg)
legacy convenience macro; it is not difficult to type Here() now
Definition: TextException.h:63
void parseChunkExtensions(Tokenizer &)
#define DBG_DATA
Definition: Stream.h:40
int size
Definition: ModDevPoll.cc:69
static const CharacterSet TCHAR
Definition: CharacterSet.h:105
static void Ignore(Tokenizer &tok, const SBuf &extName)
extracts and ignores the value of a named extension
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
#define assert(EX)
Definition: assert.h:17
bool parseChunkEnd(Tokenizer &tok)
SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0=false)
Definition: Tokenizer.cc:89
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:419
void ParseStrictBws(Parser::Tokenizer &)
Definition: Parser.cc:303
void ParseBws(Parser::Tokenizer &)
Definition: Parser.cc:297
bool parseChunkBody(Tokenizer &tok)
Definition: parse.c:160
an std::runtime_error with thrower location info
Definition: TextException.h:20
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
@ HTTP_PARSE_CHUNK_SZ
HTTP/1.1 chunked encoding chunk-size.
Definition: Parser.h:25
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition: Parser.h:152
@ HTTP_PARSE_CHUNK_EXT
HTTP/1.1 chunked encoding chunk-ext.
Definition: Parser.h:26
#define Must(condition)
Definition: TextException.h:75
bool parse(const SBuf &) override
bool parseChunkSize(Tokenizer &tok)
RFC 7230 section 4.1 chunk-size.
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:192
const A & min(A const &lhs, A const &rhs)
AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)
HTTP version label information.
const SBuf & CrLf()
CRLF textual representation.
Definition: Parser.cc:20

 

Introduction

Documentation

Support

Miscellaneous