Skip to content

Commit

Permalink
http: optionally extract charset parameter from content-type
Browse files Browse the repository at this point in the history
Since the previous commit, we now give a sanitized,
shortened version of the content-type header to any callers
who ask for it.

This patch adds back a way for them to cleanly access
specific parameters to the type. We could easily extract all
parameters and make them available via a string_list, but:

  1. That complicates the interface and memory management.

  2. In practice, no planned callers care about anything
     except the charset.

This patch therefore goes with the simplest thing, and we
can expand or change the interface later if it becomes
necessary.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
Jeff King authored and Junio C Hamano committed May 27, 2014
1 parent bf197fd commit e313162
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 4 deletions.
54 changes: 50 additions & 4 deletions http.c
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,32 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf)
return ret;
}

/*
* Check for and extract a content-type parameter. "raw"
* should be positioned at the start of the potential
* parameter, with any whitespace already removed.
*
* "name" is the name of the parameter. The value is appended
* to "out".
*/
static int extract_param(const char *raw, const char *name,
struct strbuf *out)
{
size_t len = strlen(name);

if (strncasecmp(raw, name, len))
return -1;
raw += len;

if (*raw != '=')
return -1;
raw++;

while (*raw && !isspace(*raw))
strbuf_addch(out, *raw++);
return 0;
}

/*
* Extract a normalized version of the content type, with any
* spaces suppressed, all letters lowercased, and no trailing ";"
Expand All @@ -916,11 +942,15 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf)
* but "text/plain" is the only reasonable output, and this keeps
* our code simple.
*
* If the "charset" argument is not NULL, store the value of any
* charset parameter there.
*
* Example:
* "TEXT/PLAIN; charset=utf-8" -> "text/plain"
* "TEXT/PLAIN; charset=utf-8" -> "text/plain", "utf-8"
* "text / plain" -> "text/plain"
*/
static void extract_content_type(struct strbuf *raw, struct strbuf *type)
static void extract_content_type(struct strbuf *raw, struct strbuf *type,
struct strbuf *charset)
{
const char *p;

Expand All @@ -929,10 +959,25 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type)
for (p = raw->buf; *p; p++) {
if (isspace(*p))
continue;
if (*p == ';')
if (*p == ';') {
p++;
break;
}
strbuf_addch(type, tolower(*p));
}

if (!charset)
return;

strbuf_reset(charset);
while (*p) {
while (isspace(*p))
p++;
if (!extract_param(p, "charset", charset))
return;
while (*p && !isspace(*p))
p++;
}
}

/* http_request() targets */
Expand Down Expand Up @@ -989,7 +1034,8 @@ static int http_request(const char *url,
if (options && options->content_type) {
struct strbuf raw = STRBUF_INIT;
curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &raw);
extract_content_type(&raw, options->content_type);
extract_content_type(&raw, options->content_type,
options->charset);
strbuf_release(&raw);
}

Expand Down
7 changes: 7 additions & 0 deletions http.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ struct http_get_options {
/* If non-NULL, returns the content-type of the response. */
struct strbuf *content_type;

/*
* If non-NULL, and content_type above is non-NULL, returns
* the charset parameter from the content-type. If none is
* present, returns an empty string.
*/
struct strbuf *charset;

/*
* If non-NULL, returns the URL we ended up at, including any
* redirects we followed.
Expand Down

0 comments on commit e313162

Please sign in to comment.