Benno Rice wrote:
> RFC-compliant object invalidation behaviour.
>   
> - Switch the default from not purging if the method is unknown to purging if
>   the method is unknown.
> - When purging URIs sourced from Location and Content-Location headers, make
>   sure the URL is absolute before a) comparing it to see if hosts match and b)
>   actually trying to find it in the store.
> 
> These changes are based on changes made to squid 2.  In particular, the
> urlAbsolute function was ported from squid 2.  I would appreciate it if people
> paid particular attention to urlAbsolute to make sure I'm not doing anything
> that would cause problems in squid 3.
> 
> 
> ------------------------------------------------------------------------
> 
> # Bazaar merge directive format 2 (Bazaar 0.90)
> # revision_id: benno_at_jeamland.net-20080828051217-kqnfk5s2huacfgn3
> # target_branch: file:///home/benno/squid-work/squid3-repo/trunk/
> # testament_sha1: e5013df20b582dbb9a4c9124644e6c521edd48ea
> # timestamp: 2008-08-28 15:14:45 +1000
> # base_revision_id: squid3_at_treenet.co.nz-20080827130136-\
> #   58a98r50hgyj5e27
> # 
> # Begin patch
> === modified file 'src/HttpRequestMethod.cc'
> --- src/HttpRequestMethod.cc	2008-06-20 04:43:01 +0000
> +++ src/HttpRequestMethod.cc	2008-08-28 05:12:17 +0000
> @@ -246,7 +246,7 @@
>          /* all others */
>          case METHOD_OTHER:
>          default:
> -            return false; // be conservative: we do not know some methods specs
> +            return true; // RFC says to purge if we don't know the method
Fair enough. But which RFC and section/subsection (maybe paragraph)?
There is a unit-test needed for this function too.
Ref: src/tests/testHttpRequestMethod.*
>  	}
>  
>      return false; // not reached
> 
> === modified file 'src/Server.cc'
> --- src/Server.cc	2008-07-22 12:33:41 +0000
> +++ src/Server.cc	2008-08-28 05:12:17 +0000
> @@ -402,11 +402,20 @@
>  
>  // purges entries that match the value of a given HTTP [response] header
>  static void
> -purgeEntriesByHeader(const char *reqUrl, HttpMsg *rep, http_hdr_type hdr)
> +purgeEntriesByHeader(const HttpRequest *req, const char *reqUrl, HttpMsg *rep, http_hdr_type hdr)
>  {
> -    if (const char *url = rep->header.getStr(hdr))
> -        if (sameUrlHosts(reqUrl, url)) // prevent purging DoS, per RFC 2616
> +    if (const char *url = rep->header.getStr(hdr)) {
> +	    const char *absUrl = urlAbsolute(req, url);
> +	    if (absUrl != NULL) {
> +	        url = absUrl;
> +	    }
> +        if (sameUrlHosts(reqUrl, url)) { // prevent purging DoS, per RFC 2616
>              purgeEntriesByUrl(url);
> +        }
> +        if (absUrl != NULL) {
> +            xfree((void *)absUrl);
> +        }
safe_free(absUrl);
> +    }
>  }
>  
>  // some HTTP methods should purge matching cache entries
> @@ -425,8 +434,8 @@
>     const char *reqUrl = urlCanonical(request);
>     debugs(88, 5, "maybe purging due to " << RequestMethodStr(request->method) << ' ' << reqUrl);
>     purgeEntriesByUrl(reqUrl);
> -   purgeEntriesByHeader(reqUrl, theFinalReply, HDR_LOCATION);
> -   purgeEntriesByHeader(reqUrl, theFinalReply, HDR_CONTENT_LOCATION);
> +   purgeEntriesByHeader(request, reqUrl, theFinalReply, HDR_LOCATION);
> +   purgeEntriesByHeader(request, reqUrl, theFinalReply, HDR_CONTENT_LOCATION);
>  }
>  
>  // called (usually by kids) when we have final (possibly adapted) reply headers
> 
> === modified file 'src/protos.h'
> --- src/protos.h	2008-07-17 12:38:06 +0000
> +++ src/protos.h	2008-08-28 05:12:17 +0000
> @@ -638,6 +638,7 @@
>  SQUIDCEXTERN void urlInitialize(void);
>  SQUIDCEXTERN HttpRequest *urlParse(const HttpRequestMethod&, char *, HttpRequest *request = NULL);
>  SQUIDCEXTERN const char *urlCanonical(HttpRequest *);
> +SQUIDCEXTERN const char *urlAbsolute(const HttpRequest *, const char *);
>  SQUIDCEXTERN char *urlRInternal(const char *host, u_short port, const char *dir, const char *name);
>  SQUIDCEXTERN char *urlInternal(const char *dir, const char *name);
>  SQUIDCEXTERN int matchDomainName(const char *host, const char *domain);
Most of the above should be in src/URL.h.
Can you at least add the new ones there. The 'CEXTERN declaration is 
fine until the whole URL area gets cleaned up.
> 
> === modified file 'src/url.cc'
> --- src/url.cc	2008-04-10 11:29:39 +0000
> +++ src/url.cc	2008-08-28 05:12:17 +0000
> @@ -532,6 +532,70 @@
>      return buf;
>  }
>  
Now we get the bits I have not learned much of, so lessons may be in order.
> +const char *
> +urlAbsolute(const HttpRequest * req, const char *relUrl)
> +{
> +    LOCAL_ARRAY(char, portbuf, 32);
> +    LOCAL_ARRAY(char, urlbuf, MAX_URL);
LOCAL_ARRAY is unfortunately not thread safe. Someone else may argue, 
but I prefer to see them dead.
> +    char *path, *last_slash;
> +
> +    if (relUrl == NULL) {
> +	return (NULL);
> +    }
> +    if (req->method.id() == METHOD_CONNECT) {
> +	return (NULL);
> +    }
> +    if (strchr(relUrl, ':') != NULL) {
> +	return (NULL);
Hmm, can relURL not contain the dynamic-part?
I'd expect those to have ':' sometimes.
> +    }
> +    if (req->protocol == PROTO_URN) {
> +	snprintf(urlbuf, MAX_URL, "urn:%s", req->urlpath.buf());
?? no hostname or anything but path on URNs?
Or is that a very badly named field?
> +    } else {
> +	portbuf[0] = '\0';
> +	if (req->port != urlDefaultPort(req->protocol)) {
> +	    snprintf(portbuf, 32, ":%d", req->port);
> +	}
> +	if (relUrl[0] == '/') {
> +	    snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
> +		ProtocolStr[req->protocol],
> +		req->login,
> +		*req->login ? "@" : null_string,
> +		req->GetHost(),
> +		portbuf,
> +		relUrl
> +		);
> +	} else {
> +	    path = xstrdup(req->urlpath.buf());
Should be no need for that copy. Just construct the urlBuf incrementally 
and some ptr math when it comes to the last_slash memcpy().
That would also remove the need for two different snprintf patterns below.
> +	    last_slash = strrchr(path, '/');
> +	    if (last_slash == NULL) {
> +		snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s/%s",
> +		    ProtocolStr[req->protocol],
> +		    req->login,
> +		    *req->login ? "@" : null_string,
> +		    req->GetHost(),
> +		    portbuf,
> +		    relUrl
> +		    );
> +	    } else {
> +		last_slash++;
> +		*last_slash = '\0';
> +		snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s%s",
> +		    ProtocolStr[req->protocol],
> +		    req->login,
> +		    *req->login ? "@" : null_string,
> +		    req->GetHost(),
> +		    portbuf,
> +		    path,
> +		    relUrl
> +		    );
> +	    }
> +	    xfree(path);
> +	}
> +    }
> +
> +    return (xstrdup(urlbuf));
Hmm, another allocate+copy could be replaced by doing the last minute 
allocate on urlBuf instead of a LOCAL_ARRAY.
Semantics of xstrdup() AFAIK already are that the recipient gets a 
dynamic pointer and responsibility for its destruction.
> +}
> +
>  /*
>   * matchDomainName() compares a hostname with a domainname according
>   * to the following rules:
> 
Amos
-- Please use Squid 2.7.STABLE4 or 3.0.STABLE8Received on Thu Aug 28 2008 - 11:26:05 MDT
This archive was generated by hypermail 2.2.0 : Fri Aug 29 2008 - 12:00:06 MDT