escape.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. package sip
  2. import (
  3. "strconv"
  4. "strings"
  5. )
  6. // Copyright 2009 The Go Authors. All rights reserved.
  7. // This is actually shorten copy of escape/unescape helpers of the net/url package.
  8. type encoding int
  9. const (
  10. EncodeUserPassword encoding = 1 + iota
  11. EncodeHost
  12. EncodeZone
  13. EncodeQueryComponent
  14. )
  15. type EscapeError string
  16. func (e EscapeError) Error() string {
  17. return "invalid URL escape " + strconv.Quote(string(e))
  18. }
  19. type InvalidHostError string
  20. func (e InvalidHostError) Error() string {
  21. return "invalid character " + strconv.Quote(string(e)) + " in host name"
  22. }
  23. // unescape unescapes a string; the mode specifies
  24. // which section of the URL string is being unescaped.
  25. func Unescape(s string, mode encoding) (string, error) {
  26. // Count %, check that they're well-formed.
  27. n := 0
  28. hasPlus := false
  29. for i := 0; i < len(s); {
  30. switch s[i] {
  31. case '%':
  32. n++
  33. if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
  34. s = s[i:]
  35. if len(s) > 3 {
  36. s = s[:3]
  37. }
  38. return "", EscapeError(s)
  39. }
  40. // Per https://tools.ietf.org/html/rfc3986#page-21
  41. // in the host component %-encoding can only be used
  42. // for non-ASCII bytes.
  43. // But https://tools.ietf.org/html/rfc6874#section-2
  44. // introduces %25 being allowed to escape a percent sign
  45. // in IPv6 scoped-address literals. Yay.
  46. if mode == EncodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" {
  47. return "", EscapeError(s[i : i+3])
  48. }
  49. if mode == EncodeZone {
  50. // RFC 6874 says basically "anything goes" for zone identifiers
  51. // and that even non-ASCII can be redundantly escaped,
  52. // but it seems prudent to restrict %-escaped bytes here to those
  53. // that are valid host name bytes in their unescaped form.
  54. // That is, you can use escaping in the zone identifier but not
  55. // to introduce bytes you couldn't just write directly.
  56. // But Windows puts spaces here! Yay.
  57. v := unhex(s[i+1])<<4 | unhex(s[i+2])
  58. if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, EncodeHost) {
  59. return "", EscapeError(s[i : i+3])
  60. }
  61. }
  62. i += 3
  63. case '+':
  64. hasPlus = mode == EncodeQueryComponent
  65. i++
  66. default:
  67. if (mode == EncodeHost || mode == EncodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) {
  68. return "", InvalidHostError(s[i : i+1])
  69. }
  70. i++
  71. }
  72. }
  73. if n == 0 && !hasPlus {
  74. return s, nil
  75. }
  76. var t strings.Builder
  77. t.Grow(len(s) - 2*n)
  78. for i := 0; i < len(s); i++ {
  79. switch s[i] {
  80. case '%':
  81. t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
  82. i += 2
  83. case '+':
  84. t.WriteByte('+')
  85. default:
  86. t.WriteByte(s[i])
  87. }
  88. }
  89. return t.String(), nil
  90. }
  91. func ishex(c byte) bool {
  92. switch {
  93. case '0' <= c && c <= '9':
  94. return true
  95. case 'a' <= c && c <= 'f':
  96. return true
  97. case 'A' <= c && c <= 'F':
  98. return true
  99. }
  100. return false
  101. }
  102. func unhex(c byte) byte {
  103. switch {
  104. case '0' <= c && c <= '9':
  105. return c - '0'
  106. case 'a' <= c && c <= 'f':
  107. return c - 'a' + 10
  108. case 'A' <= c && c <= 'F':
  109. return c - 'A' + 10
  110. }
  111. return 0
  112. }
  113. // Return true if the specified character should be escaped when
  114. // appearing in a URL string, according to RFC 3986.
  115. //
  116. // Please be informed that for now shouldEscape does not check all
  117. // reserved characters correctly. See golang.org/issue/5684.
  118. func shouldEscape(c byte, mode encoding) bool {
  119. // §2.3 Unreserved characters (alphanum)
  120. if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
  121. return false
  122. }
  123. if mode == EncodeHost || mode == EncodeZone {
  124. // §3.2.2 Host allows
  125. // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
  126. // as part of reg-name.
  127. // We add : because we include :port as part of host.
  128. // We add [ ] because we include [ipv6]:port as part of host.
  129. // We add < > because they're the only characters left that
  130. // we could possibly allow, and Parse will reject them if we
  131. // escape them (because hosts can't use %-encoding for
  132. // ASCII bytes).
  133. switch c {
  134. case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"':
  135. return false
  136. }
  137. }
  138. switch c {
  139. case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
  140. return false
  141. case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
  142. // Different sections of the URL allow a few of
  143. // the reserved characters to appear unescaped.
  144. switch mode {
  145. case EncodeUserPassword: // §3.2.1
  146. // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
  147. // userinfo, so we must escape only '@', '/', and '?'.
  148. // The parsing of userinfo treats ':' as special so we must escape
  149. // that too.
  150. return c == '@' || c == '/' || c == '?' || c == ':'
  151. case EncodeQueryComponent: // §3.4
  152. // The RFC reserves (so we must escape) everything.
  153. return true
  154. }
  155. }
  156. // Everything else must be escaped.
  157. return true
  158. }
  159. const upperhex = "0123456789ABCDEF"
  160. func Escape(s string, mode encoding) string {
  161. spaceCount, hexCount := 0, 0
  162. for i := 0; i < len(s); i++ {
  163. c := s[i]
  164. if shouldEscape(c, mode) {
  165. if c == ' ' && mode == EncodeQueryComponent {
  166. spaceCount++
  167. } else {
  168. hexCount++
  169. }
  170. }
  171. }
  172. if spaceCount == 0 && hexCount == 0 {
  173. return s
  174. }
  175. var buf [64]byte
  176. var t []byte
  177. required := len(s) + 2*hexCount
  178. if required <= len(buf) {
  179. t = buf[:required]
  180. } else {
  181. t = make([]byte, required)
  182. }
  183. if hexCount == 0 {
  184. copy(t, s)
  185. return string(t)
  186. }
  187. j := 0
  188. for i := 0; i < len(s); i++ {
  189. switch c := s[i]; {
  190. case c == ' ' && mode == EncodeQueryComponent:
  191. t[j] = c
  192. j++
  193. case shouldEscape(c, mode):
  194. t[j] = '%'
  195. t[j+1] = upperhex[c>>4]
  196. t[j+2] = upperhex[c&15]
  197. j += 3
  198. default:
  199. t[j] = s[i]
  200. j++
  201. }
  202. }
  203. return string(t)
  204. }