ResponseParser.cc
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 #include "squid.h"
10 #include "base/Raw.h"
11 #include "debug/Stream.h"
13 #include "http/ProtocolVersion.h"
14 #include "parser/Tokenizer.h"
15 #include "sbuf/Stream.h"
16 #include "SquidConfig.h"
17 
19 
22 {
23  Http1::Parser::size_type result = 0;
24 
25  switch (msgProtocol_.protocol)
26  {
27  case AnyP::PROTO_HTTP:
28  result += Http1magic.length();
29  break;
30  case AnyP::PROTO_ICY:
31  result += IcyMagic.length();
32  break;
33  default: // no other protocols supported
34  return result;
35  }
36  // NP: the parser does not accept >2 DIGIT for version numbers
37  if (msgProtocol_.minor > 9)
38  result += 2;
39  else
40  result += 1;
41 
42  result += 5; /* 5 octets in: SP status SP */
43  result += reasonPhrase_.length();
44  result += 2; /* CRLF terminator */
45  return result;
46 }
47 
48 // NP: we found the protocol version and consumed it already.
49 // just need the status code and reason phrase
50 int
52 {
53  try {
54  if (!completedStatus_) {
55  debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
56  ParseResponseStatus(tok, statusCode_);
57  buf_ = tok.remaining(); // resume checkpoint
58  completedStatus_ = true;
59  }
60  // NOTE: any whitespace after the single SP is part of the reason phrase.
61 
62  /* RFC 7230 says we SHOULD ignore the reason phrase content
63  * but it has a definite valid vs invalid character set.
64  * We interpret the SHOULD as ignoring absence and syntax, but
65  * producing an error if it contains an invalid octet.
66  */
67 
68  debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
69  // if we got here we are still looking for reason-phrase bytes
71  (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
72  skipLineTerminator(tok);
73  buf_ = tok.remaining(); // resume checkpoint
74  debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
75  return 1;
76  } catch (const InsufficientInput &) {
77  reasonPhrase_.clear();
78  return 0; // need more to be sure we have it all
79  } catch (const std::exception &ex) {
80  debugs(74, 6, "invalid status-line: " << ex.what());
81  }
82  return -1;
83 }
84 
85 void
87 {
88  int64_t statusValue;
89  if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(Parser::DelimiterCharacters())) {
90  debugs(74, 6, "raw status-code=" << statusValue);
91  code = static_cast<StatusCode>(statusValue); // may be invalid
92 
93  // RFC 7230 Section 3.1.2 says status-code is exactly three DIGITs
94  if (code <= 99)
95  throw TextException(ToSBuf("status-code too short: ", code), Here());
96 
97  // Codes with a non-standard first digit (a.k.a. response class) are
98  // considered semantically invalid per the following HTTP WG discussion:
99  // https://lists.w3.org/Archives/Public/ietf-http-wg/2010AprJun/0354.html
100  if (code >= 600)
101  throw TextException(ToSBuf("status-code from an invalid response class: ", code), Here());
102  } else if (tok.atEnd()) {
103  throw InsufficientInput();
104  } else {
105  throw TextException("syntactically invalid status-code area", Here());
106  }
107 }
108 
124 int
126 {
127  Tokenizer tok(buf_);
128 
129  if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
130  debugs(74, 6, "continue incremental parse for " << msgProtocol_);
131  debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
132  // we already found the magic, but not the full line. keep going.
133  return parseResponseStatusAndReason(tok);
134 
135  } else if (tok.skip(Http1magic)) {
136  debugs(74, 6, "found prefix magic " << Http1magic);
137  // HTTP Response status-line parse
138 
139  // magic contains major version, still need to find minor DIGIT
140  int64_t verMinor;
141  const auto &WspDelim = DelimiterCharacters();
142  if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
143  msgProtocol_.protocol = AnyP::PROTO_HTTP;
144  msgProtocol_.major = 1;
145  msgProtocol_.minor = static_cast<unsigned int>(verMinor);
146 
147  debugs(74, 6, "found version=" << msgProtocol_);
148 
149  debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
150  buf_ = tok.remaining(); // resume checkpoint
151  return parseResponseStatusAndReason(tok);
152 
153  } else if (tok.atEnd())
154  return 0; // need more to be sure we have it all
155  else
156  return -1; // invalid version or delimiter, a single SP terminator required
157 
158  } else if (tok.skip(IcyMagic)) {
159  debugs(74, 6, "found prefix magic " << IcyMagic);
160  // ICY Response status-line parse (same as HTTP/1 after the magic version)
161  msgProtocol_.protocol = AnyP::PROTO_ICY;
162  // NP: ICY has no /major.minor details
163  debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
164  buf_ = tok.remaining(); // resume checkpoint
165  return parseResponseStatusAndReason(tok);
166  } else if (buf_.length() < Http1magic.length() && Http1magic.startsWith(buf_)) {
167  debugs(74, 7, Raw("valid HTTP/1 prefix", buf_.rawContent(), buf_.length()));
168  return 0;
169  } else if (buf_.length() < IcyMagic.length() && IcyMagic.startsWith(buf_)) {
170  debugs(74, 7, Raw("valid ICY prefix", buf_.rawContent(), buf_.length()));
171  return 0;
172  } else {
173  debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
174  // found something that looks like an HTTP/0.9 response
175  // Gateway/Transform it into HTTP/1.1
176  msgProtocol_ = Http::ProtocolVersion(1,1);
177  // XXX: probably should use version 0.9 here and upgrade on output,
178  // but the old code did 1.1 transformation now.
179  statusCode_ = Http::scOkay;
180  static const SBuf gatewayPhrase("Gatewaying");
181  reasonPhrase_ = gatewayPhrase;
182  static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
183  /* Server: visible_appname_string */
184  "Mime-Version: 1.0\r\n"
185  /* Date: squid_curtime */
186  "Expires: -1\r\n\r\n");
187  mimeHeaderBlock_ = fakeHttpMimeBlock;
188  parsingStage_ = HTTP_PARSE_DONE;
189  return 1; // no more parsing
190  }
191 
192  // unreachable
193  assert(false);
194  return -1;
195 }
196 
197 bool
199 {
200  buf_ = aBuf;
201  debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
202 
203  // stage 1: locate the status-line
204  if (parsingStage_ == HTTP_PARSE_NONE) {
205  // RFC 7230 explicitly states whether garbage whitespace is to be handled
206  // at each point of the message framing boundaries.
207  // It omits mentioning garbage prior to HTTP Responses.
208  // Therefore, if we receive anything at all treat it as Response message.
209  if (!buf_.isEmpty())
210  parsingStage_ = HTTP_PARSE_FIRST;
211  else
212  return false;
213  }
214 
215  // stage 2: parse the status-line
216  if (parsingStage_ == HTTP_PARSE_FIRST) {
217  const int retcode = parseResponseFirstLine();
218 
219  // first-line (or a look-alike) found successfully.
220  if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST)
221  parsingStage_ = HTTP_PARSE_MIME;
222  debugs(74, 5, "status-line: retval " << retcode);
223  debugs(74, 5, "status-line: proto " << msgProtocol_);
224  debugs(74, 5, "status-line: status-code " << statusCode_);
225  debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
226  debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
227 
228  // syntax errors already
229  if (retcode < 0) {
230  parsingStage_ = HTTP_PARSE_DONE;
231  parseStatusCode = Http::scInvalidHeader;
232  return false;
233  }
234  }
235 
236  // stage 3: locate the mime header block
237  if (parsingStage_ == HTTP_PARSE_MIME) {
238  if (!grabMimeBlock("Response", Config.maxReplyHeaderSize))
239  return false;
240  }
241 
242  return !needsMoreData();
243 }
244 
static const CharacterSet & DelimiterCharacters()
Definition: Parser.cc:59
#define Here()
source code location of the caller
Definition: Here.h:15
@ PROTO_NONE
Definition: ProtocolType.h:24
unsigned int minor
minor version number
@ HTTP_PARSE_MIME
HTTP/1 mime-header block.
Definition: Parser.h:28
Definition: SBuf.h:93
size_t maxReplyHeaderSize
Definition: SquidConfig.h:137
StatusCode
Definition: StatusCode.h:20
ProtocolType protocol
which protocol this version is for
bool parse(const SBuf &aBuf) override
static const CharacterSet VCHAR
Definition: CharacterSet.h:96
#define DBG_DATA
Definition: Stream.h:40
SBuf::size_type size_type
Definition: Parser.h:43
Definition: Raw.h:20
static const CharacterSet WSP
Definition: CharacterSet.h:98
static void ParseResponseStatus(Tokenizer &, StatusCode &code)
SBuf reasonPhrase_
HTTP/1 status-line reason phrase.
@ HTTP_PARSE_NONE
initialized, but nothing usefully parsed yet
Definition: Parser.h:23
#define assert(EX)
Definition: assert.h:17
size_type length() const
Returns the number of bytes stored in SBuf.
Definition: SBuf.h:419
@ HTTP_PARSE_FIRST
HTTP/1 message first-line.
Definition: Parser.h:24
int code
Definition: smb-errors.c:145
Http1::Parser::size_type firstLineSize() const override
size in bytes of the first line including CRLF terminator
static const SBuf IcyMagic
magic prefix for identifying ICY response messages
static const SBuf Http1magic
RFC 7230 section 2.6 - 7 magic octets.
Definition: Parser.h:143
@ HTTP_PARSE_DONE
parsed a message header, or reached a terminal syntax error
Definition: Parser.h:29
@ PROTO_HTTP
Definition: ProtocolType.h:25
Definition: parse.c:160
an std::runtime_error with thrower location info
Definition: TextException.h:20
::Parser::Tokenizer Tokenizer
Definition: Parser.h:44
AnyP::ProtocolVersion msgProtocol_
what protocol label has been found in the first line (if any)
Definition: Parser.h:152
SBuf ToSBuf(Args &&... args)
slowly stream-prints all arguments into a freshly allocated SBuf
Definition: Stream.h:63
@ scInvalidHeader
Squid header parsing error.
Definition: StatusCode.h:88
static const CharacterSet OBSTEXT
Definition: CharacterSet.h:111
optimized set of C chars, with quick membership test and merge support
Definition: CharacterSet.h:17
@ PROTO_ICY
Definition: ProtocolType.h:37
@ scOkay
Definition: StatusCode.h:27
#define debugs(SECTION, LEVEL, CONTENT)
Definition: Stream.h:192
int parseResponseStatusAndReason(Tokenizer &)
class SquidConfig Config
Definition: SquidConfig.cc:12
AnyP::ProtocolVersion ProtocolVersion(unsigned int aMajor, unsigned int aMinor)
HTTP version label information.

 

Introduction

Documentation

Support

Miscellaneous