Just to confirm with you.  It is the ESCAPING mechanism that I am 
wanting to correct, its not clear if this is the route your patch has 
taken.  I would like to %ru component to be escaped according to the 
same rules as Apache at apache_1.3.33/src/main/util.c:1444 
ap_escape_logitem() function.  This function escapes the following 8bit 
characters when found in the URL:
/* For logging, escape all control characters,
 * double quotes (because they delimit the request in the log file)
 * backslashes (because we use backslash for escaping)
 * and 8-bit chars with the high bit set
 */
Which work out like:
"  => \"
\  => \\
<BS> => \b (character backspace literal into C escape string, however 
\xHH would be acceptable instead)
<NL> => \n (character newline literal into C escape string, however \xHH 
would be acceptable instead)
<CR> => \r (character carrige return literal into C escape string, 
however \xHH would be acceptable instead)
<TAB> => \t (character tab literal into C escape string, however \xHH 
would be acceptable instead)
<Ctrl-V> => \v (character ctrl-v literal into C escape string, however 
\xHH would be acceptable instead)
iscntrl(c) => \xHH  (the remaining control chars)
isprint(c) => \xHH  (I'm not sure how reliably isprint(c) == (c & 0x80) ?)
Your use of the term quoting in the email and the letter being " to make 
%"ru makes me believe the new resulting output will be like:
"GET "http://62.XX.XX.109//awstats.pl\"w;wget" HTTP/1.1"
When what I really meant (inspite of my typo in the example) was:
"GET http://62.XX.XX.109//awstats.pl\"w;wget\" HTTP/1.1"
So a string that is escaped in a way that is safe to use inside a 
quotation mark delimited compound element of the log line.
Thanks for the patch, I shall test it out later today.
Henrik Nordstrom wrote:
>> What I expected to see was:
>>
>> "GET http://62.XX.XX.109//awstats.pl"w;wget" HTTP/1.1"
>>
>> into (with additional \ character) which would be what Apache does:
>>
>> "GET http://62.XX.XX.109//awstats.pl\"w;wget" HTTP/1.1"
>
>
>
> Right, the quoting selection magics currently doesn't handle this case 
> very well.. defaulting to use no quoting of the URL data.
>
> You should get the expected output if explicitly select the quoted 
> string output format for the URL field:
>
> logformat combined %>a %ui %un [%tl] "%rm %"ru HTTP/%rv" %Hs %<st 
> "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
>
> The attached incremental patch should correct the logformat directive 
> to automatically use quoted string escaping on any format element 
> found within a quoted string (not only when the quotes is immediately 
> around the item as in the Referer and User-Agent cases), and 
> similarily for braketed items. I have also tried to make the 
> description of the format selectors perhaps a little easier to 
> understand.
>
> Regards
> Henrik
>
>------------------------------------------------------------------------
>
>Index: src/cf.data.pre
>===================================================================
>RCS file: /cvsroot/squid/squid/src/cf.data.pre,v
>retrieving revision 1.49.2.40.2.16
>diff -u -r1.49.2.40.2.16 cf.data.pre
>--- src/cf.data.pre	27 May 2005 23:49:29 -0000	1.49.2.40.2.16
>+++ src/cf.data.pre	1 Sep 2005 19:23:05 -0000
>@@ -847,17 +847,18 @@
> 	The <format specification> is a string with embedded % format codes
> 	
> 	% format codes all follow the same basic structure where all but
>-	the formatcode is optional. Output strings are automatically quoted
>+	the formatcode is optional. Output strings are automatically escaped
> 	as required according to their context and the output format
>-	modifiers are usually unneeded but can be specified if an explicit
>-	quoting format is desired.
>+	modifiers are usually not needed, but can be specified if an explicit
>+	output format is desired.
> 
> 		% ["|[|'|#] [-] [[0]width] [{argument}] formatcode
> 	
>-		"	quoted string output format
>-		[	squid log quoted format as used by log_mime_hdrs
>-		#	URL quoted output format
>-		'	No automatic quoting
>+		"	output in quoted string format
>+		[	output in squid text log format as used by log_mime_hdrs
>+		#	output in URL quoted format
>+		'	output as-is
>+
> 		-	left aligned
> 		width	field width. If starting with 0 then the
> 			output is zero padded
>Index: src/access_log.c
>===================================================================
>RCS file: /cvsroot/squid/squid/src/access_log.c,v
>retrieving revision 1.15.6.3.2.13
>diff -u -r1.15.6.3.2.13 access_log.c
>--- src/access_log.c	27 May 2005 04:34:12 -0000	1.15.6.3.2.13
>+++ src/access_log.c	1 Sep 2005 19:23:05 -0000
>@@ -718,7 +718,7 @@
>  * def is for sure null-terminated
>  */
> static int
>-accessLogGetNewLogFormatToken(logformat_token * lt, char *def, char *last)
>+accessLogGetNewLogFormatToken(logformat_token * lt, char *def, enum log_quote *quote)
> {
>     char *cur = def;
>     struct logformat_token_table_entry *lte;
>@@ -733,8 +733,26 @@
> 	xstrncpy(cp, cur, l + 1);
> 	lt->type = LFT_STRING;
> 	lt->data.string = cp;
>-	*last = cur[l - 1];
>-	cur += l;
>+	while (l > 0) {
>+	    switch(*cur) {
>+	    case '"':
>+		if (*quote == LOG_QUOTE_NONE)
>+		    *quote = LOG_QUOTE_QUOTES;
>+		else if (*quote == LOG_QUOTE_QUOTES)
>+		    *quote = LOG_QUOTE_NONE;
>+		break;
>+	    case '[':
>+	    	if (*quote == LOG_QUOTE_NONE)
>+		    *quote = LOG_QUOTE_BRAKETS;
>+		break;
>+	    case ']':
>+	    	if (*quote == LOG_QUOTE_BRAKETS)
>+		    *quote = LOG_QUOTE_NONE;
>+		break;
>+	    }
>+	    cur++;
>+	    l--;
>+	}
> 	goto done;
>     }
>     if (!*cur)
>@@ -757,6 +775,9 @@
> 	lt->quote = LOG_QUOTE_URL;
> 	cur++;
> 	break;
>+    default:
>+    	lt->quote = *quote;
>+	break;
>     }
>     if (*cur == '-') {
> 	lt->left = 1;
>@@ -793,12 +814,6 @@
> 	fatalf("Can't parse configuration token: '%s'\n",
> 	    def);
>     }
>-    if (!lt->quote) {
>-	if (*last == '"' && *cur == '"')
>-	    lt->quote = LOG_QUOTE_QUOTES;
>-	else if (*last == '[' && *cur == ']')
>-	    lt->quote = LOG_QUOTE_BRAKETS;
>-    }
>     if (*cur == ' ') {
> 	lt->space = 1;
> 	cur++;
>@@ -854,7 +869,7 @@
> {
>     char *cur, *eos;
>     logformat_token *new_lt, *last_lt;
>-    char last = '\0';
>+    enum log_quote quote = LOG_QUOTE_NONE;
> 
>     debug(46, 1) ("accessLogParseLogFormat: got definition '%s'\n", def);
> 
>@@ -865,12 +880,12 @@
>     cur = def;
>     eos = def + strlen(def);
>     *fmt = new_lt = last_lt = xmalloc(sizeof(logformat_token));
>-    cur += accessLogGetNewLogFormatToken(new_lt, cur, &last);
>+    cur += accessLogGetNewLogFormatToken(new_lt, cur, "e);
>     while (cur < eos) {
> 	new_lt = xmalloc(sizeof(logformat_token));
> 	last_lt->next = new_lt;
> 	last_lt = new_lt;
>-	cur += accessLogGetNewLogFormatToken(new_lt, cur, &last);
>+	cur += accessLogGetNewLogFormatToken(new_lt, cur, "e);
>     }
>     return 1;
> }
>  
>
-- Darryl L. Miles M: 07968 320 114Received on Fri Sep 02 2005 - 01:33:29 MDT
This archive was generated by hypermail pre-2.1.9 : Sat Oct 01 2005 - 12:00:03 MDT