A clean, Markdown-based publishing platform made for writers. Write together, and build a community. https://writefreely.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

237 lines
8.2 KiB

  1. /*
  2. * Copyright © 2018 A Bunch Tell LLC.
  3. *
  4. * This file is part of WriteFreely.
  5. *
  6. * WriteFreely is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU Affero General Public License, included
  8. * in the LICENSE file in this source code package.
  9. */
  10. package writefreely
  11. import (
  12. "fmt"
  13. "html"
  14. "html/template"
  15. "regexp"
  16. "strings"
  17. "unicode"
  18. "unicode/utf8"
  19. "github.com/microcosm-cc/bluemonday"
  20. stripmd "github.com/writeas/go-strip-markdown"
  21. blackfriday "github.com/writeas/saturday"
  22. "github.com/writeas/web-core/stringmanip"
  23. "github.com/writeas/writefreely/config"
  24. "github.com/writeas/writefreely/parse"
  25. )
  26. var (
  27. blockReg = regexp.MustCompile("<(ul|ol|blockquote)>\n")
  28. endBlockReg = regexp.MustCompile("</([a-z]+)>\n</(ul|ol|blockquote)>")
  29. youtubeReg = regexp.MustCompile("(https?://www.youtube.com/embed/[a-zA-Z0-9\\-_]+)(\\?[^\t\n\f\r \"']+)?")
  30. titleElementReg = regexp.MustCompile("</?h[1-6]>")
  31. hashtagReg = regexp.MustCompile(`{{\[\[\|\|([^|]+)\|\|\]\]}}`)
  32. markeddownReg = regexp.MustCompile("<p>(.+)</p>")
  33. )
  34. func (p *Post) formatContent(cfg *config.Config, c *Collection, isOwner bool) {
  35. baseURL := c.CanonicalURL()
  36. // TODO: redundant
  37. if !isSingleUser {
  38. baseURL = "/" + c.Alias + "/"
  39. }
  40. p.HTMLTitle = template.HTML(applyBasicMarkdown([]byte(p.Title.String)))
  41. p.HTMLContent = template.HTML(applyMarkdown([]byte(p.Content), baseURL, cfg))
  42. if exc := strings.Index(string(p.Content), "<!--more-->"); exc > -1 {
  43. p.HTMLExcerpt = template.HTML(applyMarkdown([]byte(p.Content[:exc]), baseURL, cfg))
  44. }
  45. }
  46. func (p *PublicPost) formatContent(cfg *config.Config, isOwner bool) {
  47. p.Post.formatContent(cfg, &p.Collection.Collection, isOwner)
  48. }
  49. func applyMarkdown(data []byte, baseURL string, cfg *config.Config) string {
  50. return applyMarkdownSpecial(data, false, baseURL, cfg)
  51. }
  52. func applyMarkdownSpecial(data []byte, skipNoFollow bool, baseURL string, cfg *config.Config) string {
  53. mdExtensions := 0 |
  54. blackfriday.EXTENSION_TABLES |
  55. blackfriday.EXTENSION_FENCED_CODE |
  56. blackfriday.EXTENSION_AUTOLINK |
  57. blackfriday.EXTENSION_STRIKETHROUGH |
  58. blackfriday.EXTENSION_SPACE_HEADERS |
  59. blackfriday.EXTENSION_AUTO_HEADER_IDS
  60. htmlFlags := 0 |
  61. blackfriday.HTML_USE_SMARTYPANTS |
  62. blackfriday.HTML_SMARTYPANTS_DASHES
  63. if baseURL != "" {
  64. htmlFlags |= blackfriday.HTML_HASHTAGS
  65. }
  66. // Generate Markdown
  67. md := blackfriday.Markdown([]byte(data), blackfriday.HtmlRenderer(htmlFlags, "", ""), mdExtensions)
  68. if baseURL != "" {
  69. // Replace special text generated by Markdown parser
  70. tagPrefix := baseURL + "tag:"
  71. if cfg.App.Chorus {
  72. tagPrefix = "/read/t/"
  73. }
  74. md = []byte(hashtagReg.ReplaceAll(md, []byte("<a href=\""+tagPrefix+"$1\" class=\"hashtag\"><span>#</span><span class=\"p-category\">$1</span></a>")))
  75. }
  76. // Strip out bad HTML
  77. policy := getSanitizationPolicy()
  78. policy.RequireNoFollowOnLinks(!skipNoFollow)
  79. outHTML := string(policy.SanitizeBytes(md))
  80. // Strip newlines on certain block elements that render with them
  81. outHTML = blockReg.ReplaceAllString(outHTML, "<$1>")
  82. outHTML = endBlockReg.ReplaceAllString(outHTML, "</$1></$2>")
  83. // Remove all query parameters on YouTube embed links
  84. // TODO: make this more specific. Taking the nuclear approach here to strip ?autoplay=1
  85. outHTML = youtubeReg.ReplaceAllString(outHTML, "$1")
  86. return outHTML
  87. }
  88. func applyBasicMarkdown(data []byte) string {
  89. mdExtensions := 0 |
  90. blackfriday.EXTENSION_STRIKETHROUGH |
  91. blackfriday.EXTENSION_SPACE_HEADERS |
  92. blackfriday.EXTENSION_HEADER_IDS
  93. htmlFlags := 0 |
  94. blackfriday.HTML_SKIP_HTML |
  95. blackfriday.HTML_USE_SMARTYPANTS |
  96. blackfriday.HTML_SMARTYPANTS_DASHES
  97. // Generate Markdown
  98. md := blackfriday.Markdown([]byte(data), blackfriday.HtmlRenderer(htmlFlags, "", ""), mdExtensions)
  99. // Strip out bad HTML
  100. policy := bluemonday.UGCPolicy()
  101. policy.AllowAttrs("class", "id").Globally()
  102. outHTML := string(policy.SanitizeBytes(md))
  103. outHTML = markeddownReg.ReplaceAllString(outHTML, "$1")
  104. outHTML = strings.TrimRightFunc(outHTML, unicode.IsSpace)
  105. return outHTML
  106. }
  107. func postTitle(content, friendlyId string) string {
  108. const maxTitleLen = 80
  109. // Strip HTML tags with bluemonday's StrictPolicy, then unescape the HTML
  110. // entities added in by sanitizing the content.
  111. content = html.UnescapeString(bluemonday.StrictPolicy().Sanitize(content))
  112. content = strings.TrimLeftFunc(stripmd.Strip(content), unicode.IsSpace)
  113. eol := strings.IndexRune(content, '\n')
  114. blankLine := strings.Index(content, "\n\n")
  115. if blankLine != -1 && blankLine <= eol && blankLine <= assumedTitleLen {
  116. return strings.TrimSpace(content[:blankLine])
  117. } else if utf8.RuneCountInString(content) <= maxTitleLen {
  118. return content
  119. }
  120. return friendlyId
  121. }
  122. // TODO: fix duplicated code from postTitle. postTitle is a widely used func we
  123. // don't have time to investigate right now.
  124. func friendlyPostTitle(content, friendlyId string) string {
  125. const maxTitleLen = 80
  126. // Strip HTML tags with bluemonday's StrictPolicy, then unescape the HTML
  127. // entities added in by sanitizing the content.
  128. content = html.UnescapeString(bluemonday.StrictPolicy().Sanitize(content))
  129. content = strings.TrimLeftFunc(stripmd.Strip(content), unicode.IsSpace)
  130. eol := strings.IndexRune(content, '\n')
  131. blankLine := strings.Index(content, "\n\n")
  132. if blankLine != -1 && blankLine <= eol && blankLine <= assumedTitleLen {
  133. return strings.TrimSpace(content[:blankLine])
  134. } else if eol == -1 && utf8.RuneCountInString(content) <= maxTitleLen {
  135. return content
  136. }
  137. title, truncd := parse.TruncToWord(parse.PostLede(content, true), maxTitleLen)
  138. if truncd {
  139. title += "..."
  140. }
  141. return title
  142. }
  143. func getSanitizationPolicy() *bluemonday.Policy {
  144. policy := bluemonday.UGCPolicy()
  145. policy.AllowAttrs("src", "style").OnElements("iframe", "video", "audio")
  146. policy.AllowAttrs("src", "type").OnElements("source")
  147. policy.AllowAttrs("frameborder", "width", "height").Matching(bluemonday.Integer).OnElements("iframe")
  148. policy.AllowAttrs("allowfullscreen").OnElements("iframe")
  149. policy.AllowAttrs("controls", "loop", "muted", "autoplay").OnElements("video")
  150. policy.AllowAttrs("controls", "loop", "muted", "autoplay", "preload").OnElements("audio")
  151. policy.AllowAttrs("target").OnElements("a")
  152. policy.AllowAttrs("title").OnElements("abbr")
  153. policy.AllowAttrs("style", "class", "id").Globally()
  154. policy.AllowURLSchemes("http", "https", "mailto", "xmpp")
  155. return policy
  156. }
  157. func sanitizePost(content string) string {
  158. return strings.Replace(content, "<", "&lt;", -1)
  159. }
  160. // postDescription generates a description based on the given post content,
  161. // title, and post ID. This doesn't consider a V2 post field, `title` when
  162. // choosing what to generate. In case a post has a title, this function will
  163. // fail, and logic should instead be implemented to skip this when there's no
  164. // title, like so:
  165. // var desc string
  166. // if title == "" {
  167. // desc = postDescription(content, title, friendlyId)
  168. // } else {
  169. // desc = shortPostDescription(content)
  170. // }
  171. func postDescription(content, title, friendlyId string) string {
  172. maxLen := 140
  173. if content == "" {
  174. content = "WriteFreely is a painless, simple, federated blogging platform."
  175. } else {
  176. fmtStr := "%s"
  177. truncation := 0
  178. if utf8.RuneCountInString(content) > maxLen {
  179. // Post is longer than the max description, so let's show a better description
  180. fmtStr = "%s..."
  181. truncation = 3
  182. }
  183. if title == friendlyId {
  184. // No specific title was found; simply truncate the post, starting at the beginning
  185. content = fmt.Sprintf(fmtStr, strings.Replace(stringmanip.Substring(content, 0, maxLen-truncation), "\n", " ", -1))
  186. } else {
  187. // There was a title, so return a real description
  188. blankLine := strings.Index(content, "\n\n")
  189. if blankLine < 0 {
  190. blankLine = 0
  191. }
  192. truncd := stringmanip.Substring(content, blankLine, blankLine+maxLen-truncation)
  193. contentNoNL := strings.Replace(truncd, "\n", " ", -1)
  194. content = strings.TrimSpace(fmt.Sprintf(fmtStr, contentNoNL))
  195. }
  196. }
  197. return content
  198. }
  199. func shortPostDescription(content string) string {
  200. maxLen := 140
  201. fmtStr := "%s"
  202. truncation := 0
  203. if utf8.RuneCountInString(content) > maxLen {
  204. // Post is longer than the max description, so let's show a better description
  205. fmtStr = "%s..."
  206. truncation = 3
  207. }
  208. return strings.TrimSpace(fmt.Sprintf(fmtStr, strings.Replace(stringmanip.Substring(content, 0, maxLen-truncation), "\n", " ", -1)))
  209. }