From c61fa5c53bdb86783022ed3c17f7e60cd309c3fc Mon Sep 17 00:00:00 2001 From: Oscar Wallberg Date: Sun, 17 May 2026 05:50:17 +0200 Subject: [PATCH] feat: tumblr grammar with highlights, injections, and tests --- grammar.js | 62 +- queries/tumblr/highlights.scm | 101 ++++ queries/tumblr/injections.scm | 7 + src/grammar.json | 273 ++++++++- src/node-types.json | 209 ++++++- src/parser.c | 1038 +++++++++++++++++++++++++++++++-- test/corpus/blocks.txt | 139 +++++ test/corpus/content.txt | 38 ++ test/corpus/lang.txt | 27 + test/corpus/variables.txt | 103 ++++ tree-sitter.json | 8 +- 11 files changed, 1950 insertions(+), 55 deletions(-) create mode 100644 queries/tumblr/highlights.scm create mode 100644 queries/tumblr/injections.scm create mode 100644 test/corpus/blocks.txt create mode 100644 test/corpus/content.txt create mode 100644 test/corpus/lang.txt create mode 100644 test/corpus/variables.txt diff --git a/grammar.js b/grammar.js index beb7148..b4f6b15 100644 --- a/grammar.js +++ b/grammar.js @@ -10,8 +10,64 @@ export default grammar({ name: "tumblr", + extras: _ => [], + rules: { - // TODO: add the actual grammar rules - source_file: $ => "hello" - } + template: $ => repeat($._node), + + _node: $ => choice( + $.content, + $.block_open, + $.block_close, + $.lang_tag, + $.variable, + ), + + content: _ => token(prec(-1, /([^{]|\{[^A-Za-z/])+/)), + + block_open: $ => seq( + $._block_open_start, + $.block_name, + optional($.attributes), + "}", + ), + + block_close: $ => seq( + $._block_close_start, + $.block_name, + "}", + ), + + lang_tag: $ => seq( + $._lang_start, + $.lang_text, + "}", + ), + + variable: $ => seq( + "{", + choice( + seq($.variable_name, optional(seq("-", $.variable_modifier))), + seq($.variable_prefix, ":", $.prefix_argument), + ), + "}", + ), + + attributes: $ => repeat1($.attribute), + attribute: $ => seq($._space, $.attribute_name, "=", $.attribute_value), + attribute_name: _ => /[A-Za-z_][A-Za-z0-9_-]*/, + attribute_value: _ => /"[^"]*"/, + + block_name: _ => /[A-Za-z][A-Za-z0-9_]*/, + variable_name: _ => /[A-Z][A-Za-z0-9_]*/, + variable_modifier: _ => /[A-Za-z0-9]+/, + variable_prefix: _ => choice("text", "color", "font", "image"), + prefix_argument: _ => /[A-Za-z][A-Za-z0-9 _-]*/, + lang_text: _ => /[^}]+/, + + _block_open_start: _ => /\{[Bb][Ll][Oo][Cc][Kk]:/, + _block_close_start: _ => /\{\/[Bb][Ll][Oo][Cc][Kk]:/, + _lang_start: _ => /\{[Ll][Aa][Nn][Gg]:/, + _space: _ => /[ \t]+/, + }, }); diff --git a/queries/tumblr/highlights.scm b/queries/tumblr/highlights.scm new file mode 100644 index 0000000..c52d584 --- /dev/null +++ b/queries/tumblr/highlights.scm @@ -0,0 +1,101 @@ +; Punctuation +"{" @punctuation.bracket +"}" @punctuation.bracket + +; Keywords that introduce a tag form. The grammar exposes the opening +; sequence as a hidden node, so capture the parent and let the editor +; colour the leading {block:, {/block:, {lang: literally via the +; tokenizer. The colon delimiter inside a variable_prefix tag is captured +; below. +":" @punctuation.delimiter +"-" @punctuation.delimiter +"=" @operator + +; Block names. Known data-block names get @function.builtin; If/IfNot +; toggles get @keyword.conditional (theme authors may define arbitrary +; If* / IfNot* names via so we match by prefix). +((block_name) @keyword.conditional + (#match? @keyword.conditional "^[Ii]f([Nn]ot)?[A-Z]")) + +((block_name) @function.builtin + (#any-of? @function.builtin + "Album" "AlbumArt" "Answer" "Answerer" "Aperture" "Artist" "AskEnabled" + "Audio" "AudioEmbed" "AudioPlayer" "Author" "Camera" "Caption" "Chat" + "ContentSource" "CurrentPage" "Date" "DayPage" "DayPagination" + "Description" "Even" "Excerpt" "Exif" "Exposure" "ExternalAudio" + "FeaturedTags" "FocalLength" "Followed" "Following" "GroupMember" + "GroupMembers" "HasAvatar" "HasFeaturedTags" "HasPages" "HasPermalink" + "HasTags" "HideAvatar" "HideDescription" "HideFromSearchEnabled" + "HideHeaderImage" "HideTitle" "HighRes" "HomePage" "Host" "IndexPage" + "IsActive" "IsDeactivated" "isOriginalEntry" "JumpPage" "Label" + "LikeCount" "Likes" "Lines" "Link" "LinkURL" "More" "NewDayDate" + "NextDayPage" "NextPage" "NextPost" "NoLikes" "NoSearchResults" + "NoSourceLogo" "NoteCount" "NotReblog" "Odd" "Pages" "Pagination" + "Panorama" "PermalinkPage" "PermalinkPagination" "Photo" "Photos" + "Photoset" "PinnedPostLabel" "PlayCount" "Post5" "PostNotes" "Posts" + "PostSummary" "PostTitle" "PreviousDayPage" "PreviousPage" + "PreviousPost" "Quote" "ReblogCount" "RebloggedFrom" "Reblogs" + "RelatedPosts" "ReplyCount" "SameDayDate" "SearchPage" "ShowAvatar" + "ShowDescription" "ShowHeaderImage" "ShowTitle" "Source" "SourceLogo" + "Submission" "SubmissionsEnabled" "TagPage" "Tags" "Text" "Thumbnail" + "Title" "TrackName" "Video" "VideoThumbnail" "VideoThumbnails")) + +(block_name) @function + +; Variable names. Known builtins get @variable.builtin, others get @variable. +((variable_name) @variable.builtin + (#any-of? @variable.builtin + "AccentColor" "Album" "AlbumArtURL" "Alt" "AmPm" "Answer" "Answerer" + "AnswererPortraitURL" "Aperture" "Artist" "Asker" "AskerPortraitURL" + "AskLabel" "AudioEmbed" "AudioPlayer" "Author" "AvatarShape" + "BackgroundColor" "Beats" "BlackLogoURL" "BlogURL" "Body" "Camera" + "CapitalAmPm" "Caption" "CopyrightYears" "CurrentPage" "CustomCSS" + "DayOfMonth" "DayOfMonthSuffix" "DayOfMonthWithZero" "DayOfWeek" + "DayOfWeekNumber" "DayOfYear" "Description" "EmbedUrl" "Excerpt" + "Exposure" "ExternalAudioURL" "Favicon" "FocalLength" "FollowedName" + "FollowedPortraitURL" "FollowedTitle" "FollowedURL" "FormattedPlayCount" + "GroupMemberName" "GroupMemberPortraitURL" "GroupMemberTitle" + "GroupMemberURL" "HeaderImage" "Host" "JSDescription" "JSPhotosetLayout" + "JSPlaintextDescription" "Label" "Length" "LikeButton" "LikeCount" + "Likes" "Line" "LinkCloseTag" "LinkOpenTag" "LinkURL" "LogoHeight" + "LogoWidth" "MetaDescription" "Minutes" "Month" "MonthNumber" + "MonthNumberWithZero" "Name" "NextDayPage" "NextPage" "NextPost" + "NoteCount" "NoteCountWithLabel" "NPF" "PageNumber" "Permalink" + "PhotoAlt" "PhotoCount" "PhotoHeight" "Photoset" "PhotosetLayout" + "PhotoURL" "PhotoWidth" "PinnedPostLabel" "PlaintextName" "PlayCount" + "PlayCountWithLabel" "PortraitURL" "PostAuthorName" + "PostAuthorPortraitURL" "PostAuthorTitle" "PostAuthorURL" "PostID" + "PostNotes" "PostNotesURL" "PostSummary" "PostTitle" "PostType" + "PreviousDayPage" "PreviousPage" "PreviousPost" "Question" "Quote" + "RawAudioURL" "ReblogButton" "ReblogCount" "ReblogParentName" + "ReblogParentPortraitURL" "ReblogParentTitle" "ReblogParentURL" + "ReblogRootName" "ReblogRootPortraitURL" "ReblogRootTitle" + "ReblogRootURL" "RelativePermalink" "Replies" "ReplyCount" "RSS" + "SearchQuery" "SearchResultCount" "Seconds" "ShortDayOfWeek" + "ShortMonth" "ShortURL" "ShortYear" "Source" "SourceTitle" "SourceURL" + "SubmitLabel" "Submitter" "SubmitterPortraitURL" "SubmitterURL" "Tag" + "TagsAsClasses" "TagURL" "TagURLChrono" "Target" "Thumbnail" "TimeAgo" + "Timestamp" "Title" "TitleColor" "TitleFont" "TitleFontWeight" + "TotalPages" "TrackName" "URL" "URLEncodedPermalink" + "URLSafeSearchQuery" "URLSafeTag" "Username" "UserNumber" "Video" + "VideoEmbed" "VideoThumbnailURL" "WeekOfYear" "Year")) + +(variable_name) @variable + +; Size suffix on URL-style variables. Numeric forms read as numbers, +; named forms (HighRes, Panorama, ...) as constants. +((variable_modifier) @number + (#match? @number "^[0-9]+(sq)?$")) +(variable_modifier) @constant.builtin + +; text: / color: / font: / image: prefix and its argument. +(variable_prefix) @keyword +(prefix_argument) @variable + +; {lang:Translatable string} +(lang_tag) @string.special +(lang_text) @string + +; Block attributes: {block:Photoset rows="3"} +(attribute_name) @attribute +(attribute_value) @string diff --git a/queries/tumblr/injections.scm b/queries/tumblr/injections.scm new file mode 100644 index 0000000..f3ae2fb --- /dev/null +++ b/queries/tumblr/injections.scm @@ -0,0 +1,7 @@ +; All non-tag content is HTML. injection.combined concatenates every +; content node so an HTML tag opened in one chunk can close in another +; (across a {block:Foo}...{/block:Foo}). HTML's own injection queries +; then take over for + +--- + +(template + (content) + (variable + (variable_prefix) + (prefix_argument)) + (content)) diff --git a/test/corpus/lang.txt b/test/corpus/lang.txt new file mode 100644 index 0000000..010c4b2 --- /dev/null +++ b/test/corpus/lang.txt @@ -0,0 +1,27 @@ +================== +Simple lang tag +================== + +{lang:Source} + +--- + +(template + (content) + (lang_tag + (lang_text)) + (content)) + +================== +Lang tag with spaces and punctuation +================== + +{lang:Read more, please} + +--- + +(template + (content) + (lang_tag + (lang_text)) + (content)) diff --git a/test/corpus/variables.txt b/test/corpus/variables.txt new file mode 100644 index 0000000..51590bb --- /dev/null +++ b/test/corpus/variables.txt @@ -0,0 +1,103 @@ +================== +Plain variable +================== + +{Title} + +--- + +(template + (content) + (variable + (variable_name)) + (content)) + +================== +Variable with numeric size suffix +================== + +{PortraitURL-128} + +--- + +(template + (content) + (variable + (variable_name) + (variable_modifier)) + (content)) + +================== +Variable with named size suffix +================== + +{PhotoURL-HighRes} + +--- + +(template + (content) + (variable + (variable_name) + (variable_modifier)) + (content)) + +================== +Multiple variables with text between +================== + +

{Title}

+

{Description}

+ +--- + +(template + (content) + (variable + (variable_name)) + (content) + (variable + (variable_name)) + (content)) + +================== +Prefixed variable +================== + +{color:Background} {font:Body} {image:Header} {text:Tagline} + +--- + +(template + (content) + (variable + (variable_prefix) + (prefix_argument)) + (content) + (variable + (variable_prefix) + (prefix_argument)) + (content) + (variable + (variable_prefix) + (prefix_argument)) + (content) + (variable + (variable_prefix) + (prefix_argument)) + (content)) + +================== +Prefix argument with spaces +================== + +{text:Header Subtitle} + +--- + +(template + (content) + (variable + (variable_prefix) + (prefix_argument)) + (content)) diff --git a/tree-sitter.json b/tree-sitter.json index ffc48cf..a6696d7 100644 --- a/tree-sitter.json +++ b/tree-sitter.json @@ -20,13 +20,11 @@ "authors": [ { "name": "Oscar Wallberg", - "email": "oscar.wallberg@outlook.com", - "url": "" + "email": "oscar.wallberg@outlook.com" } ], "links": { - "repository": "git.owall.dev/warg/tree-sitter-tumblr", - "funding": "" + "repository": "https://git.owall.dev/warg/tree-sitter-tumblr" } }, "bindings": { @@ -39,4 +37,4 @@ "swift": false, "zig": false } -} \ No newline at end of file +}