123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- package httphead
- import (
- "bytes"
- )
- // ItemType encodes type of the lexing token.
- type ItemType int
- const (
- // ItemUndef reports that token is undefined.
- ItemUndef ItemType = iota
- // ItemToken reports that token is RFC2616 token.
- ItemToken
- // ItemSeparator reports that token is RFC2616 separator.
- ItemSeparator
- // ItemString reports that token is RFC2616 quouted string.
- ItemString
- // ItemComment reports that token is RFC2616 comment.
- ItemComment
- // ItemOctet reports that token is octet slice.
- ItemOctet
- )
- // Scanner represents header tokens scanner.
- // See https://tools.ietf.org/html/rfc2616#section-2
- type Scanner struct {
- data []byte
- pos int
- itemType ItemType
- itemBytes []byte
- err bool
- }
- // NewScanner creates new RFC2616 data scanner.
- func NewScanner(data []byte) *Scanner {
- return &Scanner{data: data}
- }
- // Next scans for next token. It returns true on successful scanning, and false
- // on error or EOF.
- func (l *Scanner) Next() bool {
- c, ok := l.nextChar()
- if !ok {
- return false
- }
- switch c {
- case '"': // quoted-string;
- return l.fetchQuotedString()
- case '(': // comment;
- return l.fetchComment()
- case '\\', ')': // unexpected chars;
- l.err = true
- return false
- default:
- return l.fetchToken()
- }
- }
- // FetchUntil fetches ItemOctet from current scanner position to first
- // occurence of the c or to the end of the underlying data.
- func (l *Scanner) FetchUntil(c byte) bool {
- l.resetItem()
- if l.pos == len(l.data) {
- return false
- }
- return l.fetchOctet(c)
- }
- // Peek reads byte at current position without advancing it. On end of data it
- // returns 0.
- func (l *Scanner) Peek() byte {
- if l.pos == len(l.data) {
- return 0
- }
- return l.data[l.pos]
- }
- // Peek2 reads two first bytes at current position without advancing it.
- // If there not enough data it returs 0.
- func (l *Scanner) Peek2() (a, b byte) {
- if l.pos == len(l.data) {
- return 0, 0
- }
- if l.pos+1 == len(l.data) {
- return l.data[l.pos], 0
- }
- return l.data[l.pos], l.data[l.pos+1]
- }
- // Buffered reporst how many bytes there are left to scan.
- func (l *Scanner) Buffered() int {
- return len(l.data) - l.pos
- }
- // Advance moves current position index at n bytes. It returns true on
- // successful move.
- func (l *Scanner) Advance(n int) bool {
- l.pos += n
- if l.pos > len(l.data) {
- l.pos = len(l.data)
- return false
- }
- return true
- }
- // Skip skips all bytes until first occurence of c.
- func (l *Scanner) Skip(c byte) {
- if l.err {
- return
- }
- // Reset scanner state.
- l.resetItem()
- if i := bytes.IndexByte(l.data[l.pos:], c); i == -1 {
- // Reached the end of data.
- l.pos = len(l.data)
- } else {
- l.pos += i + 1
- }
- }
- // SkipEscaped skips all bytes until first occurence of non-escaped c.
- func (l *Scanner) SkipEscaped(c byte) {
- if l.err {
- return
- }
- // Reset scanner state.
- l.resetItem()
- if i := ScanUntil(l.data[l.pos:], c); i == -1 {
- // Reached the end of data.
- l.pos = len(l.data)
- } else {
- l.pos += i + 1
- }
- }
- // Type reports current token type.
- func (l *Scanner) Type() ItemType {
- return l.itemType
- }
- // Bytes returns current token bytes.
- func (l *Scanner) Bytes() []byte {
- return l.itemBytes
- }
- func (l *Scanner) nextChar() (byte, bool) {
- // Reset scanner state.
- l.resetItem()
- if l.err {
- return 0, false
- }
- l.pos += SkipSpace(l.data[l.pos:])
- if l.pos == len(l.data) {
- return 0, false
- }
- return l.data[l.pos], true
- }
- func (l *Scanner) resetItem() {
- l.itemType = ItemUndef
- l.itemBytes = nil
- }
- func (l *Scanner) fetchOctet(c byte) bool {
- i := l.pos
- if j := bytes.IndexByte(l.data[l.pos:], c); j == -1 {
- // Reached the end of data.
- l.pos = len(l.data)
- } else {
- l.pos += j
- }
- l.itemType = ItemOctet
- l.itemBytes = l.data[i:l.pos]
- return true
- }
- func (l *Scanner) fetchToken() bool {
- n, t := ScanToken(l.data[l.pos:])
- if n == -1 {
- l.err = true
- return false
- }
- l.itemType = t
- l.itemBytes = l.data[l.pos : l.pos+n]
- l.pos += n
- return true
- }
- func (l *Scanner) fetchQuotedString() (ok bool) {
- l.pos++
- n := ScanUntil(l.data[l.pos:], '"')
- if n == -1 {
- l.err = true
- return false
- }
- l.itemType = ItemString
- l.itemBytes = RemoveByte(l.data[l.pos:l.pos+n], '\\')
- l.pos += n + 1
- return true
- }
- func (l *Scanner) fetchComment() (ok bool) {
- l.pos++
- n := ScanPairGreedy(l.data[l.pos:], '(', ')')
- if n == -1 {
- l.err = true
- return false
- }
- l.itemType = ItemComment
- l.itemBytes = RemoveByte(l.data[l.pos:l.pos+n], '\\')
- l.pos += n + 1
- return true
- }
- // ScanUntil scans for first non-escaped character c in given data.
- // It returns index of matched c and -1 if c is not found.
- func ScanUntil(data []byte, c byte) (n int) {
- for {
- i := bytes.IndexByte(data[n:], c)
- if i == -1 {
- return -1
- }
- n += i
- if n == 0 || data[n-1] != '\\' {
- break
- }
- n++
- }
- return
- }
- // ScanPairGreedy scans for complete pair of opening and closing chars in greedy manner.
- // Note that first opening byte must not be present in data.
- func ScanPairGreedy(data []byte, open, close byte) (n int) {
- var m int
- opened := 1
- for {
- i := bytes.IndexByte(data[n:], close)
- if i == -1 {
- return -1
- }
- n += i
- // If found index is not escaped then it is the end.
- if n == 0 || data[n-1] != '\\' {
- opened--
- }
- for m < i {
- j := bytes.IndexByte(data[m:i], open)
- if j == -1 {
- break
- }
- m += j + 1
- opened++
- }
- if opened == 0 {
- break
- }
- n++
- m = n
- }
- return
- }
- // RemoveByte returns data without c. If c is not present in data it returns
- // the same slice. If not, it copies data without c.
- func RemoveByte(data []byte, c byte) []byte {
- j := bytes.IndexByte(data, c)
- if j == -1 {
- return data
- }
- n := len(data) - 1
- // If character is present, than allocate slice with n-1 capacity. That is,
- // resulting bytes could be at most n-1 length.
- result := make([]byte, n)
- k := copy(result, data[:j])
- for i := j + 1; i < n; {
- j = bytes.IndexByte(data[i:], c)
- if j != -1 {
- k += copy(result[k:], data[i:i+j])
- i = i + j + 1
- } else {
- k += copy(result[k:], data[i:])
- break
- }
- }
- return result[:k]
- }
- // SkipSpace skips spaces and lws-sequences from p.
- // It returns number ob bytes skipped.
- func SkipSpace(p []byte) (n int) {
- for len(p) > 0 {
- switch {
- case len(p) >= 3 &&
- p[0] == '\r' &&
- p[1] == '\n' &&
- OctetTypes[p[2]].IsSpace():
- p = p[3:]
- n += 3
- case OctetTypes[p[0]].IsSpace():
- p = p[1:]
- n++
- default:
- return
- }
- }
- return
- }
- // ScanToken scan for next token in p. It returns length of the token and its
- // type. It do not trim p.
- func ScanToken(p []byte) (n int, t ItemType) {
- if len(p) == 0 {
- return 0, ItemUndef
- }
- c := p[0]
- switch {
- case OctetTypes[c].IsSeparator():
- return 1, ItemSeparator
- case OctetTypes[c].IsToken():
- for n = 1; n < len(p); n++ {
- c := p[n]
- if !OctetTypes[c].IsToken() {
- break
- }
- }
- return n, ItemToken
- default:
- return -1, ItemUndef
- }
- }
|