The code powering m.abunchtell.com https://m.abunchtell.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

91 line
4.2 KiB

  1. module Twitter
  2. class Regex
  3. REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}<>\(\)\?]/iou
  4. REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*"'「」<>;:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
  5. REGEXEN[:valid_url_balanced_parens] = /
  6. \(
  7. (?:
  8. #{REGEXEN[:valid_general_url_path_chars]}+
  9. |
  10. # allow one nested level of balanced parentheses
  11. (?:
  12. #{REGEXEN[:valid_general_url_path_chars]}*
  13. \(
  14. #{REGEXEN[:valid_general_url_path_chars]}+
  15. \)
  16. #{REGEXEN[:valid_general_url_path_chars]}*
  17. )
  18. )
  19. \)
  20. /iox
  21. REGEXEN[:valid_url_path] = /(?:
  22. (?:
  23. #{REGEXEN[:valid_general_url_path_chars]}*
  24. (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
  25. #{REGEXEN[:valid_url_path_ending_chars]}
  26. )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
  27. )/iox
  28. REGEXEN[:valid_url] = %r{
  29. ( # $1 total match
  30. (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
  31. ( # $3 URL
  32. ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
  33. (#{REGEXEN[:valid_domain]}) # $5 Domain(s)
  34. (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
  35. (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
  36. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
  37. )
  38. )
  39. }iox
  40. REGEXEN[:validate_nodeid] = /(?:
  41. #{REGEXEN[:validate_url_unreserved]}|
  42. #{REGEXEN[:validate_url_pct_encoded]}|
  43. [!$()*+,;=]
  44. )/iox
  45. REGEXEN[:validate_resid] = /(?:
  46. #{REGEXEN[:validate_url_unreserved]}|
  47. #{REGEXEN[:validate_url_pct_encoded]}|
  48. #{REGEXEN[:validate_url_sub_delims]}
  49. )/iox
  50. REGEXEN[:valid_xmpp_uri] = %r{
  51. ( # $1 total match
  52. (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
  53. ( # $3 URL
  54. ((?:xmpp):) # $4 Protocol
  55. (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
  56. (#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
  57. (#{REGEXEN[:valid_domain]}) # $7 Domain in path
  58. (/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
  59. (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
  60. )
  61. )
  62. }iox
  63. end
  64. module Extractor
  65. # Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
  66. # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
  67. # XMPP URIs an empty array will be returned.
  68. #
  69. # If a block is given then it will be called for each XMPP URI.
  70. def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
  71. return [] unless text && text.index(":")
  72. urls = []
  73. text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
  74. valid_uri_match_data = $~
  75. start_position = valid_uri_match_data.char_begin(3)
  76. end_position = valid_uri_match_data.char_end(3)
  77. urls << {
  78. :url => valid_uri_match_data[3],
  79. :indices => [start_position, end_position]
  80. }
  81. end
  82. urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given?
  83. urls
  84. end
  85. end
  86. end