A clean, Markdown-based publishing platform made for writers. Write together, and build a community. https://writefreely.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

232 lines
8.0 KiB

  1. /*
  2. * Copyright © 2018 A Bunch Tell LLC.
  3. *
  4. * This file is part of WriteFreely.
  5. *
  6. * WriteFreely is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU Affero General Public License, included
  8. * in the LICENSE file in this source code package.
  9. */
  10. package writefreely
  11. import (
  12. "bytes"
  13. "fmt"
  14. "github.com/microcosm-cc/bluemonday"
  15. stripmd "github.com/writeas/go-strip-markdown"
  16. "github.com/writeas/saturday"
  17. "github.com/writeas/web-core/stringmanip"
  18. "github.com/writeas/writefreely/parse"
  19. "html"
  20. "html/template"
  21. "regexp"
  22. "strings"
  23. "unicode"
  24. "unicode/utf8"
  25. )
  26. var (
  27. blockReg = regexp.MustCompile("<(ul|ol|blockquote)>\n")
  28. endBlockReg = regexp.MustCompile("</([a-z]+)>\n</(ul|ol|blockquote)>")
  29. youtubeReg = regexp.MustCompile("(https?://www.youtube.com/embed/[a-zA-Z0-9\\-_]+)(\\?[^\t\n\f\r \"']+)?")
  30. titleElementReg = regexp.MustCompile("</?h[1-6]>")
  31. hashtagReg = regexp.MustCompile(`#([\p{L}\p{M}\d]+)`)
  32. markeddownReg = regexp.MustCompile("<p>(.+)</p>")
  33. )
  34. func (p *Post) formatContent(c *Collection, isOwner bool) {
  35. baseURL := c.CanonicalURL()
  36. if !isSingleUser {
  37. baseURL = "/" + c.Alias + "/"
  38. }
  39. newCon := hashtagReg.ReplaceAllFunc([]byte(p.Content), func(b []byte) []byte {
  40. // Ensure we only replace "hashtags" that have already been extracted.
  41. // `hashtagReg` catches everything, including any hash on the end of a
  42. // URL, so we rely on p.Tags as the final word on whether or not to link
  43. // a tag.
  44. for _, t := range p.Tags {
  45. if string(b) == "#"+t {
  46. return bytes.Replace(b, []byte("#"+t), []byte("<a href=\""+baseURL+"tag:"+t+"\" class=\"hashtag\"><span>#</span><span class=\"p-category\">"+t+"</span></a>"), -1)
  47. }
  48. }
  49. return b
  50. })
  51. p.HTMLTitle = template.HTML(applyBasicMarkdown([]byte(p.Title.String)))
  52. p.HTMLContent = template.HTML(applyMarkdown([]byte(newCon)))
  53. if exc := strings.Index(string(newCon), "<!--more-->"); exc > -1 {
  54. p.HTMLExcerpt = template.HTML(applyMarkdown([]byte(newCon[:exc])))
  55. }
  56. }
  57. func (p *PublicPost) formatContent(isOwner bool) {
  58. p.Post.formatContent(&p.Collection.Collection, isOwner)
  59. }
  60. func applyMarkdown(data []byte) string {
  61. return applyMarkdownSpecial(data, false)
  62. }
  63. func applyMarkdownSpecial(data []byte, skipNoFollow bool) string {
  64. mdExtensions := 0 |
  65. blackfriday.EXTENSION_TABLES |
  66. blackfriday.EXTENSION_FENCED_CODE |
  67. blackfriday.EXTENSION_AUTOLINK |
  68. blackfriday.EXTENSION_STRIKETHROUGH |
  69. blackfriday.EXTENSION_SPACE_HEADERS |
  70. blackfriday.EXTENSION_AUTO_HEADER_IDS
  71. htmlFlags := 0 |
  72. blackfriday.HTML_USE_SMARTYPANTS |
  73. blackfriday.HTML_SMARTYPANTS_DASHES
  74. // Generate Markdown
  75. md := blackfriday.Markdown([]byte(data), blackfriday.HtmlRenderer(htmlFlags, "", ""), mdExtensions)
  76. // Strip out bad HTML
  77. policy := getSanitizationPolicy()
  78. policy.RequireNoFollowOnLinks(!skipNoFollow)
  79. outHTML := string(policy.SanitizeBytes(md))
  80. // Strip newlines on certain block elements that render with them
  81. outHTML = blockReg.ReplaceAllString(outHTML, "<$1>")
  82. outHTML = endBlockReg.ReplaceAllString(outHTML, "</$1></$2>")
  83. // Remove all query parameters on YouTube embed links
  84. // TODO: make this more specific. Taking the nuclear approach here to strip ?autoplay=1
  85. outHTML = youtubeReg.ReplaceAllString(outHTML, "$1")
  86. return outHTML
  87. }
  88. func applyBasicMarkdown(data []byte) string {
  89. mdExtensions := 0 |
  90. blackfriday.EXTENSION_STRIKETHROUGH |
  91. blackfriday.EXTENSION_SPACE_HEADERS |
  92. blackfriday.EXTENSION_HEADER_IDS
  93. htmlFlags := 0 |
  94. blackfriday.HTML_SKIP_HTML |
  95. blackfriday.HTML_USE_SMARTYPANTS |
  96. blackfriday.HTML_SMARTYPANTS_DASHES
  97. // Generate Markdown
  98. md := blackfriday.Markdown([]byte(data), blackfriday.HtmlRenderer(htmlFlags, "", ""), mdExtensions)
  99. // Strip out bad HTML
  100. policy := bluemonday.UGCPolicy()
  101. policy.AllowAttrs("class", "id").Globally()
  102. outHTML := string(policy.SanitizeBytes(md))
  103. outHTML = markeddownReg.ReplaceAllString(outHTML, "$1")
  104. outHTML = strings.TrimRightFunc(outHTML, unicode.IsSpace)
  105. return outHTML
  106. }
  107. func postTitle(content, friendlyId string) string {
  108. const maxTitleLen = 80
  109. // Strip HTML tags with bluemonday's StrictPolicy, then unescape the HTML
  110. // entities added in by sanitizing the content.
  111. content = html.UnescapeString(bluemonday.StrictPolicy().Sanitize(content))
  112. content = strings.TrimLeftFunc(stripmd.Strip(content), unicode.IsSpace)
  113. eol := strings.IndexRune(content, '\n')
  114. blankLine := strings.Index(content, "\n\n")
  115. if blankLine != -1 && blankLine <= eol && blankLine <= assumedTitleLen {
  116. return strings.TrimSpace(content[:blankLine])
  117. } else if utf8.RuneCountInString(content) <= maxTitleLen {
  118. return content
  119. }
  120. return friendlyId
  121. }
  122. // TODO: fix duplicated code from postTitle. postTitle is a widely used func we
  123. // don't have time to investigate right now.
  124. func friendlyPostTitle(content, friendlyId string) string {
  125. const maxTitleLen = 80
  126. // Strip HTML tags with bluemonday's StrictPolicy, then unescape the HTML
  127. // entities added in by sanitizing the content.
  128. content = html.UnescapeString(bluemonday.StrictPolicy().Sanitize(content))
  129. content = strings.TrimLeftFunc(stripmd.Strip(content), unicode.IsSpace)
  130. eol := strings.IndexRune(content, '\n')
  131. blankLine := strings.Index(content, "\n\n")
  132. if blankLine != -1 && blankLine <= eol && blankLine <= assumedTitleLen {
  133. return strings.TrimSpace(content[:blankLine])
  134. } else if eol == -1 && utf8.RuneCountInString(content) <= maxTitleLen {
  135. return content
  136. }
  137. title, truncd := parse.TruncToWord(parse.PostLede(content, true), maxTitleLen)
  138. if truncd {
  139. title += "..."
  140. }
  141. return title
  142. }
  143. func getSanitizationPolicy() *bluemonday.Policy {
  144. policy := bluemonday.UGCPolicy()
  145. policy.AllowAttrs("src", "style").OnElements("iframe", "video")
  146. policy.AllowAttrs("frameborder", "width", "height").Matching(bluemonday.Integer).OnElements("iframe")
  147. policy.AllowAttrs("allowfullscreen").OnElements("iframe")
  148. policy.AllowAttrs("controls", "loop", "muted", "autoplay").OnElements("video")
  149. policy.AllowAttrs("target").OnElements("a")
  150. policy.AllowAttrs("style", "class", "id").Globally()
  151. policy.AllowURLSchemes("http", "https", "mailto", "xmpp")
  152. return policy
  153. }
  154. func sanitizePost(content string) string {
  155. return strings.Replace(content, "<", "&lt;", -1)
  156. }
  157. // postDescription generates a description based on the given post content,
  158. // title, and post ID. This doesn't consider a V2 post field, `title` when
  159. // choosing what to generate. In case a post has a title, this function will
  160. // fail, and logic should instead be implemented to skip this when there's no
  161. // title, like so:
  162. // var desc string
  163. // if title == "" {
  164. // desc = postDescription(content, title, friendlyId)
  165. // } else {
  166. // desc = shortPostDescription(content)
  167. // }
  168. func postDescription(content, title, friendlyId string) string {
  169. maxLen := 140
  170. if content == "" {
  171. content = "Write Freely is a painless, simple, federated blogging platform."
  172. } else {
  173. fmtStr := "%s"
  174. truncation := 0
  175. if utf8.RuneCountInString(content) > maxLen {
  176. // Post is longer than the max description, so let's show a better description
  177. fmtStr = "%s..."
  178. truncation = 3
  179. }
  180. if title == friendlyId {
  181. // No specific title was found; simply truncate the post, starting at the beginning
  182. content = fmt.Sprintf(fmtStr, strings.Replace(stringmanip.Substring(content, 0, maxLen-truncation), "\n", " ", -1))
  183. } else {
  184. // There was a title, so return a real description
  185. blankLine := strings.Index(content, "\n\n")
  186. if blankLine < 0 {
  187. blankLine = 0
  188. }
  189. truncd := stringmanip.Substring(content, blankLine, blankLine+maxLen-truncation)
  190. contentNoNL := strings.Replace(truncd, "\n", " ", -1)
  191. content = strings.TrimSpace(fmt.Sprintf(fmtStr, contentNoNL))
  192. }
  193. }
  194. return content
  195. }
  196. func shortPostDescription(content string) string {
  197. maxLen := 140
  198. fmtStr := "%s"
  199. truncation := 0
  200. if utf8.RuneCountInString(content) > maxLen {
  201. // Post is longer than the max description, so let's show a better description
  202. fmtStr = "%s..."
  203. truncation = 3
  204. }
  205. return strings.TrimSpace(fmt.Sprintf(fmtStr, strings.Replace(stringmanip.Substring(content, 0, maxLen-truncation), "\n", " ", -1)))
  206. }