squid : Optimising Web Delivery

Go to the documentation of this file.

 /*
  * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
  * Please see the COPYING and CONTRIBUTORS files for details.
  */
  
 #include "squid.h"
 #include "base/CharacterSet.h"
 #include "debug/Stream.h"
 #include "http/one/Parser.h"
 #include "mime_header.h"
 #include "parser/Tokenizer.h"
 #include "SquidConfig.h"
  
 const SBuf Http::One::Parser::Http1magic("HTTP/1.");
  
 const SBuf &Http::One::CrLf()
 {
     static const SBuf crlf("\r\n");
     return crlf;
 }
  
 void
 Http::One::Parser::clear()
 {
     parsingStage_ = HTTP_PARSE_NONE;
     buf_ = nullptr;
     msgProtocol_ = AnyP::ProtocolVersion();
     mimeHeaderBlock_.clear();
 }
  
 static const CharacterSet &
 RelaxedDelimiterCharacters()
 {
     // RFC 7230 section 3.5
     // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
     // or bare CR as whitespace between request-line fields
     static const CharacterSet RelaxedDels =
         (CharacterSet::SP +
          CharacterSet::HTAB +
          CharacterSet("VT,FF","\x0B\x0C") +
          CharacterSet::CR).rename("relaxed-WSP");
  
     return RelaxedDels;
 }
  
 const CharacterSet &
 Http::One::Parser::WhitespaceCharacters()
 {
     return Config.onoff.relaxed_header_parser ?
            RelaxedDelimiterCharacters() : CharacterSet::WSP;
 }
  
 const CharacterSet &
 Http::One::Parser::DelimiterCharacters()
 {
     return Config.onoff.relaxed_header_parser ?
            RelaxedDelimiterCharacters() : CharacterSet::SP;
 }
  
 void
 Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
 {
     if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
         return;
  
     tok.skipRequired("line-terminating CRLF", Http1::CrLf());
 }
  
 static const CharacterSet &
 LineCharacters()
 {
     static const CharacterSet line = CharacterSet::LF.complement("non-LF");
     return line;
 }
  
 void
 Http::One::Parser::cleanMimePrefix()
 {
     Tokenizer tok(mimeHeaderBlock_);
     while (tok.skipOne(RelaxedDelimiterCharacters())) {
         (void)tok.skipAll(LineCharacters()); // optional line content
         // LF terminator is required.
         // trust headersEnd() to ensure that we have at least one LF
         (void)tok.skipOne(CharacterSet::LF);
     }
  
     // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
     // then we skipped everything, including that terminating LF.
     // Restore the terminating CRLF if needed.
     if (tok.atEnd())
         mimeHeaderBlock_ = Http1::CrLf();
     else
         mimeHeaderBlock_ = tok.remaining();
     // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
 }
  
 void
 Http::One::Parser::unfoldMime()
 {
     Tokenizer tok(mimeHeaderBlock_);
     const auto szLimit = mimeHeaderBlock_.length();
     mimeHeaderBlock_.clear();
     // prevent the mime sender being able to make append() realloc/grow multiple times.
     mimeHeaderBlock_.reserveSpace(szLimit);
  
     static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
  
     while (!tok.atEnd()) {
         const SBuf all(tok.remaining());
         const auto blobLen = tok.skipAll(nonCRLF); // may not be there
         const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
         const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
  
         if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
             mimeHeaderBlock_.append(all.substr(0, blobLen));
             mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
         } else
             mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
     }
 }
  
 bool
 Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
 {
     // MIME headers block exist in (only) HTTP/1.x and ICY
     const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
                             msgProtocol_.protocol == AnyP::PROTO_ICY ||
                             hackExpectsMime_;
  
     if (expectMime) {
         /* NOTE: HTTP/0.9 messages do not have a mime header block.
          *       So the rest of the code will need to deal with '0'-byte headers
          *       (ie, none, so don't try parsing em)
          */
         bool containsObsFold;
         if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
  
             // Squid could handle these headers, but admin does not want to
             if (firstLineSize() + mimeHeaderBytes >= limit) {
                 debugs(33, 5, "Too large " << which);
                 parseStatusCode = Http::scHeaderTooLarge;
                 buf_.consume(mimeHeaderBytes);
                 parsingStage_ = HTTP_PARSE_DONE;
                 return false;
             }
  
             mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
             cleanMimePrefix();
             if (containsObsFold)
                 unfoldMime();
  
             debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
  
         } else { // headersEnd() == 0
             if (buf_.length()+firstLineSize() >= limit) {
                 debugs(33, 5, "Too large " << which);
                 parseStatusCode = Http::scHeaderTooLarge;
                 parsingStage_ = HTTP_PARSE_DONE;
             } else
                 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
             return false;
         }
  
     } else
         debugs(33, 3, "Missing HTTP/1.x identifier");
  
     // NP: we do not do any further stages here yet so go straight to DONE
     parsingStage_ = HTTP_PARSE_DONE;
  
     return true;
 }
  
 // arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()
 #define GET_HDR_SZ  1024
  
 // BUG: returns only the first header line with given name,
 //      ignores multi-line headers and obs-fold headers
 char *
 Http::One::Parser::getHostHeaderField()
 {
     if (!headerBlockSize())
         return nullptr;
  
     LOCAL_ARRAY(char, header, GET_HDR_SZ);
     const char *name = "Host";
     const int namelen = strlen(name);
  
     debugs(25, 5, "looking for " << name);
  
     // while we can find more LF in the SBuf
     Tokenizer tok(mimeHeaderBlock_);
     SBuf p;
  
     while (tok.prefix(p, LineCharacters())) {
         if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
             break; // error. reached invalid octet or end of buffer instead of an LF ??
  
         // header lines must start with the name (case insensitive)
         if (p.substr(0, namelen).caseCmp(name, namelen))
             continue;
  
         // then a COLON
         if (p[namelen] != ':')
             continue;
  
         // drop any trailing *CR sequence
         p.trim(Http1::CrLf(), false, true);
  
         debugs(25, 5, "checking " << p);
         p.consume(namelen + 1);
  
         // TODO: optimize SBuf::trim to take CharacterSet directly
         Tokenizer t(p);
         t.skipAll(CharacterSet::WSP);
         p = t.remaining();
  
         // prevent buffer overrun on char header[];
         p.chop(0, sizeof(header)-1);
  
         // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]
         static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
         if (p.findFirstNotOf(hostChars) != SBuf::npos)
             break; // error. line contains character not accepted in Host header
  
         // return the header field-value
         SBufToCstring(header, p);
         debugs(25, 5, "returning " << header);
         return header;
     }
  
     return nullptr;
 }
  
 int
 Http::One::ErrorLevel()
 {
     return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
 }
  
 namespace Http::One {
 static void
 ParseBws_(Parser::Tokenizer &tok, const CharacterSet &bwsChars)
 {
     const auto count = tok.skipAll(bwsChars);
  
     if (tok.atEnd())
         throw InsufficientInput(); // even if count is positive
  
     if (count) {
         // Generating BWS is a MUST-level violation so warn about it as needed.
         debugs(33, ErrorLevel(), "found " << count << " BWS octets");
         // RFC 7230 says we MUST parse BWS, so we fall through even if
         // Config.onoff.relaxed_header_parser is off.
     }
     // else we successfully "parsed" an empty BWS sequence
  
     // success: no more BWS characters expected
 }
 } // namespace Http::One
  
 void
 Http::One::ParseBws(Parser::Tokenizer &tok)
 {
     ParseBws_(tok, Parser::WhitespaceCharacters());
 }
  
 void
 Http::One::ParseStrictBws(Parser::Tokenizer &tok)
 {
     ParseBws_(tok, CharacterSet::WSP);
 }
  

Http::One::Parser::DelimiterCharacters

static const CharacterSet & DelimiterCharacters()

Definition: Parser.cc:59

Http::One::Parser::cleanMimePrefix

void cleanMimePrefix()

Definition: Parser.cc:97

SquidConfig::relaxed_header_parser

int relaxed_header_parser

Definition: SquidConfig.h:315

Ftp::ProtocolVersion

AnyP::ProtocolVersion ProtocolVersion()

Protocol version to use in Http::Message structures wrapping FTP messages.

Definition: Elements.cc:24

LOCAL_ARRAY

#define LOCAL_ARRAY(type, name, size)

Definition: squid.h:62

LineCharacters

static const CharacterSet & LineCharacters()

all characters except the LF line terminator

Definition: Parser.cc:76

SBuf

Definition: SBuf.h:93

Http::One::Parser::clear

virtual void clear()=0

Definition: Parser.cc:27

Http::One::Parser::parsingStage_

ParseState parsingStage_

what stage the parser is currently up to

Definition: Parser.h:149

SBufToCstring

void SBufToCstring(char *d, const SBuf &s)

Definition: SBuf.h:756

CharacterSet::complement

CharacterSet complement(const char *complementLabel=nullptr) const

Definition: CharacterSet.cc:74

CharacterSet::LF

static const CharacterSet LF

Definition: CharacterSet.h:92

SBuf::substr

SBuf substr(size_type pos, size_type n=npos) const

Definition: SBuf.cc:576

SBuf::clear

void clear()

Definition: SBuf.cc:175

CharacterSet::ALPHA

static const CharacterSet ALPHA

Definition: CharacterSet.h:76

SBuf::chop

SBuf & chop(size_type pos, size_type n=npos)

Definition: SBuf.cc:530

CharacterSet::CR

static const CharacterSet CR

Definition: CharacterSet.h:80

Http::One::Parser::WhitespaceCharacters

static const CharacterSet & WhitespaceCharacters()

Definition: Parser.cc:52

CharacterSet::WSP

static const CharacterSet WSP

Definition: CharacterSet.h:98

CharacterSet.h

SBuf::consume

SBuf consume(size_type n=npos)

Definition: SBuf.cc:481

Http::One::ParseBws_

static void ParseBws_(Parser::Tokenizer &tok, const CharacterSet &bwsChars)

Definition: Parser.cc:277

Tokenizer.h

SBuf::size_type

MemBlob::size_type size_type

Definition: SBuf.h:96

RelaxedDelimiterCharacters

static const CharacterSet & RelaxedDelimiterCharacters()

characters HTTP permits tolerant parsers to accept as delimiters

Definition: Parser.cc:37

Http::One::Parser::skipLineTerminator

void skipLineTerminator(Tokenizer &) const

Definition: Parser.cc:66

Http::One::Parser::grabMimeBlock

bool grabMimeBlock(const char *which, const size_t limit)

Definition: Parser.cc:157

Http::One::Parser::mimeHeaderBlock_

SBuf mimeHeaderBlock_

buffer holding the mime headers (if any)

Definition: Parser.h:155

Http::One::HTTP_PARSE_NONE

@ HTTP_PARSE_NONE

initialized, but nothing usefully parsed yet

Definition: Parser.h:23

mime_header.h

Http::One::Parser::buf_

SBuf buf_

bytes remaining to be parsed

Definition: Parser.h:146

CharacterSet::HTAB

static const CharacterSet HTAB

Definition: CharacterSet.h:90

CharacterSet::DIGIT

static const CharacterSet DIGIT

Definition: CharacterSet.h:84

SBuf::trim

SBuf & trim(const SBuf &toRemove, bool atBeginning=true, bool atEnd=true)

Definition: SBuf.cc:551

Parser::Tokenizer

Definition: Tokenizer.h:29

Http::One::Parser::getHostHeaderField

char * getHostHeaderField()

Definition: Parser.cc:213

CharacterSet::rename

CharacterSet & rename(const char *label)

change name; handy in const declarations that use operators

Definition: CharacterSet.h:61

SBuf::npos

static const size_type npos

Definition: SBuf.h:100

SBuf::findFirstNotOf

size_type findFirstNotOf(const CharacterSet &set, size_type startPos=0) const

Definition: SBuf.cc:746

Http::One::ParseStrictBws

void ParseStrictBws(Parser::Tokenizer &)

Definition: Parser.cc:303

Http::One::Parser::Http1magic

static const SBuf Http1magic

RFC 7230 section 2.6 - 7 magic octets.

Definition: Parser.h:143

Http::One::Parser::unfoldMime

void unfoldMime()

Definition: Parser.cc:132

Http::One::ParseBws

void ParseBws(Parser::Tokenizer &)

Definition: Parser.cc:297

Http::One::HTTP_PARSE_DONE

@ HTTP_PARSE_DONE

parsed a message header, or reached a terminal syntax error

Definition: Parser.h:29

AnyP::PROTO_HTTP

@ PROTO_HTTP

Definition: ProtocolType.h:25

tok

Definition: parse.c:160

Stream.h

Http::One::Parser::Tokenizer

::Parser::Tokenizer Tokenizer

Definition: Parser.h:44

Http::One::Parser::msgProtocol_

AnyP::ProtocolVersion msgProtocol_

what protocol label has been found in the first line (if any)

Definition: Parser.h:152

Parser.h

DBG_IMPORTANT

#define DBG_IMPORTANT

Definition: Stream.h:38

SquidConfig.h

CharacterSet

optimized set of C chars, with quick membership test and merge support

Definition: CharacterSet.h:17

SBuf::caseCmp

int caseCmp(const SBuf &S, const size_type n) const

shorthand version for case-insensitive compare()

Definition: SBuf.h:287

Http::One::ErrorLevel

int ErrorLevel()

the right debugs() level for logging HTTP violation messages

Definition: Parser.cc:269

AnyP::PROTO_ICY

@ PROTO_ICY

Definition: ProtocolType.h:37

Http::scHeaderTooLarge

@ scHeaderTooLarge

Header too large to process.

Definition: StatusCode.h:89

squid.h

SquidConfig::onoff

struct SquidConfig::@90 onoff

headersEnd

size_t headersEnd(const char *mime, size_t l, bool &containsObsFold)

Definition: mime_header.cc:17

CharacterSet::SP

static const CharacterSet SP

Definition: CharacterSet.h:94

GET_HDR_SZ

#define GET_HDR_SZ

Definition: Parser.cc:208

debugs

#define debugs(SECTION, LEVEL, CONTENT)

Definition: Stream.h:192

Http::One

common part of ParseBws() and ParseStrctBws()

Definition: forward.h:17

crlf

const CharacterSet crlf("crlf","\r\n")

Definition: Elements.cc:12

Config

class SquidConfig Config

Definition: SquidConfig.cc:12

Http::One::CrLf

const SBuf & CrLf()

CRLF textual representation.

Definition: Parser.cc:20

squid-cache.org

Optimising Web Delivery

Introduction

Documentation

Support

Miscellaneous