123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616 |
- /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
- * applicable.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /*
- * apr_date.c: date parsing utility routines
- * These routines are (hopefully) platform independent.
- *
- * 27 Oct 1996 Roy Fielding
- * Extracted (with many modifications) from mod_proxy.c and
- * tested with over 50,000 randomly chosen valid date strings
- * and several hundred variations of invalid date strings.
- *
- */
- #include "apr.h"
- #include "apr_lib.h"
- #define APR_WANT_STRFUNC
- #include "apr_want.h"
- #if APR_HAVE_STDLIB_H
- #include <stdlib.h>
- #endif
- #if APR_HAVE_CTYPE_H
- #include <ctype.h>
- #endif
- #include "apr_date.h"
- /*
- * Compare a string to a mask
- * Mask characters (arbitrary maximum is 256 characters, just in case):
- * @ - uppercase letter
- * $ - lowercase letter
- * & - hex digit
- * # - digit
- * ~ - digit or space
- * * - swallow remaining characters
- * <x> - exact match for any other character
- */
- APU_DECLARE(int) apr_date_checkmask(const char *data, const char *mask)
- {
- int i;
- char d;
- for (i = 0; i < 256; i++) {
- d = data[i];
- switch (mask[i]) {
- case '\0':
- return (d == '\0');
- case '*':
- return 1;
- case '@':
- if (!apr_isupper(d))
- return 0;
- break;
- case '$':
- if (!apr_islower(d))
- return 0;
- break;
- case '#':
- if (!apr_isdigit(d))
- return 0;
- break;
- case '&':
- if (!apr_isxdigit(d))
- return 0;
- break;
- case '~':
- if ((d != ' ') && !apr_isdigit(d))
- return 0;
- break;
- default:
- if (mask[i] != d)
- return 0;
- break;
- }
- }
- return 0; /* We only get here if mask is corrupted (exceeds 256) */
- }
- /*
- * Parses an HTTP date in one of three standard forms:
- *
- * Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
- * Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
- * Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
- *
- * and returns the apr_time_t number of microseconds since 1 Jan 1970 GMT,
- * or APR_DATE_BAD if this would be out of range or if the date is invalid.
- *
- * The restricted HTTP syntax is
- *
- * HTTP-date = rfc1123-date | rfc850-date | asctime-date
- *
- * rfc1123-date = wkday "," SP date1 SP time SP "GMT"
- * rfc850-date = weekday "," SP date2 SP time SP "GMT"
- * asctime-date = wkday SP date3 SP time SP 4DIGIT
- *
- * date1 = 2DIGIT SP month SP 4DIGIT
- * ; day month year (e.g., 02 Jun 1982)
- * date2 = 2DIGIT "-" month "-" 2DIGIT
- * ; day-month-year (e.g., 02-Jun-82)
- * date3 = month SP ( 2DIGIT | ( SP 1DIGIT ))
- * ; month day (e.g., Jun 2)
- *
- * time = 2DIGIT ":" 2DIGIT ":" 2DIGIT
- * ; 00:00:00 - 23:59:59
- *
- * wkday = "Mon" | "Tue" | "Wed"
- * | "Thu" | "Fri" | "Sat" | "Sun"
- *
- * weekday = "Monday" | "Tuesday" | "Wednesday"
- * | "Thursday" | "Friday" | "Saturday" | "Sunday"
- *
- * month = "Jan" | "Feb" | "Mar" | "Apr"
- * | "May" | "Jun" | "Jul" | "Aug"
- * | "Sep" | "Oct" | "Nov" | "Dec"
- *
- * However, for the sake of robustness (and Netscapeness), we ignore the
- * weekday and anything after the time field (including the timezone).
- *
- * This routine is intended to be very fast; 10x faster than using sscanf.
- *
- * Originally from Andrew Daviel <andrew@vancouver-webpages.com>, 29 Jul 96
- * but many changes since then.
- *
- */
- APU_DECLARE(apr_time_t) apr_date_parse_http(const char *date)
- {
- apr_time_exp_t ds;
- apr_time_t result;
- int mint, mon;
- const char *monstr, *timstr;
- static const int months[12] =
- {
- ('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
- ('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
- ('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
- ('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
- ('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
- ('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c'};
- if (!date)
- return APR_DATE_BAD;
- while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
- ++date;
- if (*date == '\0')
- return APR_DATE_BAD;
- if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
- return APR_DATE_BAD;
- ++date; /* Now pointing to first char after space, which should be */
- /* start of the actual date information for all 4 formats. */
- if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) {
- /* RFC 1123 format with two days */
- ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 12;
- }
- else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {
- /* RFC 850 format */
- ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 10;
- }
- else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
- /* asctime format */
- ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
- if (date[4] == ' ')
- ds.tm_mday = 0;
- else
- ds.tm_mday = (date[4] - '0') * 10;
- ds.tm_mday += (date[5] - '0');
- monstr = date;
- timstr = date + 7;
- }
- else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
- /* RFC 1123 format with one day */
- ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
- ds.tm_mday = (date[0] - '0');
- monstr = date + 2;
- timstr = date + 11;
- }
- else
- return APR_DATE_BAD;
- if (ds.tm_mday <= 0 || ds.tm_mday > 31)
- return APR_DATE_BAD;
- ds.tm_hour = ((timstr[0] - '0') * 10) + (timstr[1] - '0');
- ds.tm_min = ((timstr[3] - '0') * 10) + (timstr[4] - '0');
- ds.tm_sec = ((timstr[6] - '0') * 10) + (timstr[7] - '0');
- if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
- return APR_DATE_BAD;
- mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
- for (mon = 0; mon < 12; mon++)
- if (mint == months[mon])
- break;
- if (mon == 12)
- return APR_DATE_BAD;
- if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
- return APR_DATE_BAD;
- /* February gets special check for leapyear */
- if ((mon == 1) &&
- ((ds.tm_mday > 29) ||
- ((ds.tm_mday == 29)
- && ((ds.tm_year & 3)
- || (((ds.tm_year % 100) == 0)
- && (((ds.tm_year % 400) != 100)))))))
- return APR_DATE_BAD;
- ds.tm_mon = mon;
- /* ap_mplode_time uses tm_usec and tm_gmtoff fields, but they haven't
- * been set yet.
- * It should be safe to just zero out these values.
- * tm_usec is the number of microseconds into the second. HTTP only
- * cares about second granularity.
- * tm_gmtoff is the number of seconds off of GMT the time is. By
- * definition all times going through this function are in GMT, so this
- * is zero.
- */
- ds.tm_usec = 0;
- ds.tm_gmtoff = 0;
- if (apr_time_exp_get(&result, &ds) != APR_SUCCESS)
- return APR_DATE_BAD;
-
- return result;
- }
- /*
- * Parses a string resembling an RFC 822 date. This is meant to be
- * leinent in its parsing of dates. Hence, this will parse a wider
- * range of dates than apr_date_parse_http.
- *
- * The prominent mailer (or poster, if mailer is unknown) that has
- * been seen in the wild is included for the unknown formats.
- *
- * Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
- * Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
- * Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
- * Sun, 6 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
- * Sun, 06 Nov 94 08:49:37 GMT ; RFC 822
- * Sun, 6 Nov 94 08:49:37 GMT ; RFC 822
- * Sun, 06 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
- * Sun, 6 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
- * Sun, 06 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
- * Sun, 6 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
- * Mon, 7 Jan 2002 07:21:22 GMT ; Unknown [Postfix]
- * Sun, 06-Nov-1994 08:49:37 GMT ; RFC 850 with four digit years
- *
- */
- #define TIMEPARSE(ds,hr10,hr1,min10,min1,sec10,sec1) \
- { \
- ds.tm_hour = ((hr10 - '0') * 10) + (hr1 - '0'); \
- ds.tm_min = ((min10 - '0') * 10) + (min1 - '0'); \
- ds.tm_sec = ((sec10 - '0') * 10) + (sec1 - '0'); \
- }
- #define TIMEPARSE_STD(ds,timstr) \
- { \
- TIMEPARSE(ds, timstr[0],timstr[1], \
- timstr[3],timstr[4], \
- timstr[6],timstr[7]); \
- }
- APU_DECLARE(apr_time_t) apr_date_parse_rfc(const char *date)
- {
- apr_time_exp_t ds;
- apr_time_t result;
- int mint, mon;
- const char *monstr, *timstr, *gmtstr;
- static const int months[12] =
- {
- ('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
- ('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
- ('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
- ('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
- ('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
- ('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c' };
- if (!date)
- return APR_DATE_BAD;
- /* Not all dates have text months at the beginning. */
- if (!apr_isdigit(date[0]))
- {
- while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
- ++date;
- if (*date == '\0')
- return APR_DATE_BAD;
- if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
- return APR_DATE_BAD;
- ++date; /* Now pointing to first char after space, which should be */ }
- /* start of the actual date information for all 11 formats. */
- if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) { /* RFC 1123 format */
- ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 12;
- gmtstr = date + 20;
- TIMEPARSE_STD(ds, timstr);
- }
- else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {/* RFC 850 format */
- ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 10;
- gmtstr = date + 19;
- TIMEPARSE_STD(ds, timstr);
- }
- else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
- /* asctime format */
- ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
- if (date[4] == ' ')
- ds.tm_mday = 0;
- else
- ds.tm_mday = (date[4] - '0') * 10;
- ds.tm_mday += (date[5] - '0');
- monstr = date;
- timstr = date + 7;
- gmtstr = NULL;
- TIMEPARSE_STD(ds, timstr);
- }
- else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
- /* RFC 1123 format*/
- ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
- ds.tm_mday = (date[0] - '0');
- monstr = date + 2;
- timstr = date + 11;
- gmtstr = date + 20;
- TIMEPARSE_STD(ds, timstr);
- }
- else if (apr_date_checkmask(date, "## @$$ ## ##:##:## *")) {
- /* This is the old RFC 1123 date format - many many years ago, people
- * used two-digit years. Oh, how foolish. */
- ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 10;
- gmtstr = date + 19;
- TIMEPARSE_STD(ds, timstr);
- }
- else if (apr_date_checkmask(date, "# @$$ ## ##:##:## *")) {
- /* This is the old RFC 1123 date format - many many years ago, people
- * used two-digit years. Oh, how foolish. */
- ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = (date[0] - '0');
- monstr = date + 2;
- timstr = date + 9;
- gmtstr = date + 18;
- TIMEPARSE_STD(ds, timstr);
- }
- else if (apr_date_checkmask(date, "## @$$ ## ##:## *")) {
- /* Loser format. This is quite bogus. */
- ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 10;
- gmtstr = NULL;
- TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
- }
- else if (apr_date_checkmask(date, "# @$$ ## ##:## *")) {
- /* Loser format. This is quite bogus. */
- ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = (date[0] - '0');
- monstr = date + 2;
- timstr = date + 9;
- gmtstr = NULL;
- TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
- }
- else if (apr_date_checkmask(date, "## @$$ ## #:##:## *")) {
- /* Loser format. This is quite bogus. */
- ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 9;
- gmtstr = date + 18;
- TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
- }
- else if (apr_date_checkmask(date, "# @$$ ## #:##:## *")) {
- /* Loser format. This is quite bogus. */
- ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
- if (ds.tm_year < 70)
- ds.tm_year += 100;
- ds.tm_mday = (date[0] - '0');
- monstr = date + 2;
- timstr = date + 8;
- gmtstr = date + 17;
- TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
- }
- else if (apr_date_checkmask(date, " # @$$ #### ##:##:## *")) {
- /* RFC 1123 format with a space instead of a leading zero. */
- ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
- ds.tm_mday = (date[1] - '0');
- monstr = date + 3;
- timstr = date + 12;
- gmtstr = date + 20;
- TIMEPARSE_STD(ds, timstr);
- }
- else if (apr_date_checkmask(date, "##-@$$-#### ##:##:## *")) {
- /* RFC 1123 with dashes instead of spaces between date/month/year
- * This also looks like RFC 850 with four digit years.
- */
- ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
- if (ds.tm_year < 0)
- return APR_DATE_BAD;
- ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
- ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
- monstr = date + 3;
- timstr = date + 12;
- gmtstr = date + 21;
- TIMEPARSE_STD(ds, timstr);
- }
- else
- return APR_DATE_BAD;
- if (ds.tm_mday <= 0 || ds.tm_mday > 31)
- return APR_DATE_BAD;
- if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
- return APR_DATE_BAD;
- mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
- for (mon = 0; mon < 12; mon++)
- if (mint == months[mon])
- break;
- if (mon == 12)
- return APR_DATE_BAD;
- if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
- return APR_DATE_BAD;
- /* February gets special check for leapyear */
- if ((mon == 1) &&
- ((ds.tm_mday > 29)
- || ((ds.tm_mday == 29)
- && ((ds.tm_year & 3)
- || (((ds.tm_year % 100) == 0)
- && (((ds.tm_year % 400) != 100)))))))
- return APR_DATE_BAD;
- ds.tm_mon = mon;
- /* tm_gmtoff is the number of seconds off of GMT the time is.
- *
- * We only currently support: [+-]ZZZZ where Z is the offset in
- * hours from GMT.
- *
- * If there is any confusion, tm_gmtoff will remain 0.
- */
- ds.tm_gmtoff = 0;
- if (gmtstr && *gmtstr != '\0') {
- /* Do we have a GMT? */
- if (*(++gmtstr) != '\0') {
- int offset;
- switch (*(gmtstr++)) {
- case '-':
- offset = atoi(gmtstr);
- ds.tm_gmtoff -= (offset / 100) * 60 * 60;
- ds.tm_gmtoff -= (offset % 100) * 60;
- break;
- case '+':
- offset = atoi(gmtstr);
- ds.tm_gmtoff += (offset / 100) * 60 * 60;
- ds.tm_gmtoff += (offset % 100) * 60;
- break;
- }
- }
- }
- /* apr_time_exp_get uses tm_usec field, but it hasn't been set yet.
- * It should be safe to just zero out this value.
- * tm_usec is the number of microseconds into the second. HTTP only
- * cares about second granularity.
- */
- ds.tm_usec = 0;
- if (apr_time_exp_gmt_get(&result, &ds) != APR_SUCCESS)
- return APR_DATE_BAD;
-
- return result;
- }
|