feat: format message from telegram and upload attachments (#1924)

* feat: format message from telegram and download documents

* fix: remove bool in expression

* refactor: convert to markdown

* refactor: resolve remarks and add support new message types

* refactor: resolve remarks

* feat: add test for mime type

---------

Co-authored-by: Александр Тумайкин <AATumaykin@tsum.ru>
This commit is contained in:
Alexandr Tumaykin 2023-07-13 19:18:44 +03:00 committed by GitHub
parent f074bb1be2
commit c5a1f4c839
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 422 additions and 63 deletions

View File

@ -0,0 +1,14 @@
package telegram
// Animation represents an animation file.
type Animation struct {
FileID string `json:"file_id"` // FileID is the identifier for this file, which can be used to download or reuse the file
FileUniqueID string `json:"file_unique_id"` // FileUniqueID is the unique identifier for this file, which is supposed to be the same over time and for different bots. Can't be used to download or reuse the file.
Width int `json:"width"` // Width video width as defined by sender
Height int `json:"height"` // Height video height as defined by sender
Duration int `json:"duration"` // Duration of the video in seconds as defined by sender
Thumbnail *PhotoSize `json:"thumb"` // Thumbnail animation thumbnail as defined by sender
FileName string `json:"file_name"` // FileName original animation filename as defined by sender
MimeType string `json:"mime_type"` // MimeType of the file as defined by sender
FileSize int `json:"file_size"`
}

View File

@ -0,0 +1,38 @@
package telegram
import (
"path"
"github.com/usememos/memos/common/log"
"go.uber.org/zap"
)
type Attachment struct {
FileName string
MimeType string
FileSize int64
Data []byte
}
var mimeTypes = map[string]string{
".jpg": "image/jpeg",
".png": "image/png",
".mp4": "video/mp4", // for video note
".oga": "audio/ogg", // for voice
}
func (b Attachment) GetMimeType() string {
if b.MimeType != "" {
return b.MimeType
}
mime, ok := mimeTypes[path.Ext(b.FileName)]
if !ok {
// Handle unknown file extension
log.Warn("Unknown file type for ", zap.String("filename", b.FileName))
return "application/octet-stream"
}
return mime
}

View File

@ -0,0 +1,83 @@
package telegram
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestGetMimeType(t *testing.T) {
tests := []struct {
mimeType string
fileName string
expected string
}{
{
fileName: "file.jpg",
mimeType: "image/jpeg",
expected: "image/jpeg",
},
{
fileName: "file.png",
mimeType: "image/png",
expected: "image/png",
},
{
fileName: "file.pdf",
mimeType: "application/pdf",
expected: "application/pdf",
},
{
fileName: "file.php",
mimeType: "application/x-php",
expected: "application/x-php",
},
{
fileName: "file.xlsx",
mimeType: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
expected: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
},
{
fileName: "file.oga",
mimeType: "audio/ogg",
expected: "audio/ogg",
},
{
fileName: "file.jpg",
expected: "image/jpeg",
},
{
fileName: "file.png",
expected: "image/png",
},
{
fileName: "file.mp4",
expected: "video/mp4",
},
{
fileName: "file.pdf",
expected: "application/octet-stream",
},
{
fileName: "file.oga",
expected: "audio/ogg",
},
{
fileName: "file.xlsx",
expected: "application/octet-stream",
},
{
fileName: "file.txt",
expected: "application/octet-stream",
},
}
for _, test := range tests {
attachment := Attachment{
FileName: test.fileName,
MimeType: test.mimeType,
}
require.Equal(t, test.expected, attachment.GetMimeType())
}
}

14
plugin/telegram/audio.go Normal file
View File

@ -0,0 +1,14 @@
package telegram
// Audio represents an audio file to be treated as music by the Telegram clients.
type Audio struct {
FileID string `json:"file_id"` // FileID is an identifier for this file, which can be used to download or reuse the file
FileUniqueID string `json:"file_unique_id"` // FileUniqueID is the unique identifier for this file, which is supposed to be the same over time and for different bots. Can't be used to download or reuse the file.
Duration int `json:"duration"` // Duration of the audio in seconds as defined by sender
Performer string `json:"performer"` // Performer of the audio as defined by sender or by audio tags
Title string `json:"title"` // Title of the audio as defined by sender or by audio tags
FileName string `json:"file_name"` // FileName is the original filename as defined by sender
MimeType string `json:"mime_type"` // MimeType of the file as defined by sender
FileSize int `json:"file_size"` // FileSize file size
Thumbnail *PhotoSize `json:"thumb"` // Thumbnail is the album cover to which the music file belongs
}

View File

@ -13,7 +13,7 @@ import (
type Handler interface {
BotToken(ctx context.Context) string
MessageHandle(ctx context.Context, bot *Bot, message Message, blobs map[string][]byte) error
MessageHandle(ctx context.Context, bot *Bot, message Message, attachments []Attachment) error
CallbackQueryHandle(ctx context.Context, bot *Bot, callbackQuery CallbackQuery) error
}
@ -65,9 +65,9 @@ func (b *Bot) Start(ctx context.Context) {
if update.Message != nil {
message := *update.Message
// skip message other than text or photo
if message.Text == nil && message.Photo == nil {
_, err := b.SendReplyMessage(ctx, message.Chat.ID, message.MessageID, "Only text or photo message be supported")
// skip unsupported message
if !message.IsSupported() {
_, err := b.SendReplyMessage(ctx, message.Chat.ID, message.MessageID, "Supported messages: animation, audio, text, document, photo, video, video note, voice, other messages with caption")
if err != nil {
log.Error(fmt.Sprintf("fail to telegram.SendReplyMessage for messageID=%d", message.MessageID), zap.Error(err))
}

View File

@ -1,9 +1,19 @@
package telegram
type ChatType string
const (
Private = "private"
Group = "group"
SuperGroup = "supergroup"
Channel = "channel"
)
type Chat struct {
ID int `json:"id"`
Title string `json:"title"`
Type string `json:"type"`
FirstName string `json:"first_name"`
LastName string `json:"last_name"`
Title string `json:"title"` // Title for supergroups, channels and group chats
Type ChatType `json:"type"` // Type of chat, can be either “private”, “group”, “supergroup” or “channel”
FirstName string `json:"first_name"` // FirstName of the other party in a private chat
LastName string `json:"last_name"` // LastName of the other party in a private chat
UserName string `json:"username"` // UserName for private chats, supergroups and channels if available
}

View File

@ -0,0 +1,11 @@
package telegram
// Document represents a general file.
type Document struct {
FileID string `json:"file_id"` // FileID is an identifier for this file, which can be used to download or reuse the file
FileUniqueID string `json:"file_unique_id"` // FileUniqueID is the unique identifier for this file, which is supposed to be the same over time and for different bots. Can't be used to download or reuse the file.
Thumbnail *PhotoSize `json:"thumb"` // Thumbnail document thumbnail as defined by sender
FileName string `json:"file_name"` // FileName original filename as defined by sender
MimeType string `json:"mime_type"` // MimeType of the file as defined by sender
FileSize int `json:"file_size"`
}

View File

@ -8,18 +8,72 @@ import (
"strings"
)
// downloadFileId download file with fileID, return the filepath and blob.
func (b *Bot) downloadFileID(ctx context.Context, fileID string) (string, []byte, error) {
file, err := b.GetFile(ctx, fileID)
if err != nil {
return "", nil, err
}
blob, err := b.downloadFilepath(ctx, file.FilePath)
if err != nil {
return "", nil, err
func (b *Bot) downloadAttachment(ctx context.Context, message *Message) (*Attachment, error) {
var fileID, fileName, mimeType string
switch {
case len(message.Photo) > 0:
fileID = message.GetMaxPhotoFileID()
case message.Animation != nil:
fileID = message.Animation.FileID
fileName = message.Animation.FileName
mimeType = message.Animation.MimeType
case message.Audio != nil:
fileID = message.Audio.FileID
fileName = message.Audio.FileName
mimeType = message.Audio.MimeType
case message.Document != nil:
fileID = message.Document.FileID
fileName = message.Document.FileName
mimeType = message.Document.MimeType
case message.Video != nil:
fileID = message.Video.FileID
fileName = message.Video.FileName
mimeType = message.Video.MimeType
case message.VideoNote != nil:
fileID = message.VideoNote.FileID
case message.Voice != nil:
fileID = message.Voice.FileID
mimeType = message.Voice.MimeType
}
return file.FilePath, blob, nil
if fileID == "" {
return nil, nil
}
attachment, err := b.downloadFileID(ctx, fileID)
if err != nil {
return nil, err
}
if fileName != "" {
attachment.FileName = fileName
}
if mimeType != "" {
attachment.MimeType = mimeType
}
return attachment, nil
}
// downloadFileId download file with fileID, return Blob struct.
func (b *Bot) downloadFileID(ctx context.Context, fileID string) (*Attachment, error) {
file, err := b.GetFile(ctx, fileID)
if err != nil {
return nil, err
}
data, err := b.downloadFilepath(ctx, file.FilePath)
if err != nil {
return nil, err
}
blob := &Attachment{
FileName: file.FilePath,
Data: data,
FileSize: file.FileSize,
}
return blob, nil
}
// downloadFilepath download file with filepath, you can get filepath by calling GetFile.

View File

@ -2,24 +2,23 @@ package telegram
import (
"context"
"fmt"
)
// handleSingleMessages handle single messages not belongs to group.
func (b *Bot) handleSingleMessages(ctx context.Context, messages []Message) error {
for _, message := range messages {
var blobs map[string][]byte
var attachments []Attachment
// download blob if provided
if len(message.Photo) > 0 {
filepath, blob, err := b.downloadFileID(ctx, message.GetMaxPhotoFileID())
for _, message := range messages {
attachment, err := b.downloadAttachment(ctx, &message)
if err != nil {
return err
}
blobs = map[string][]byte{filepath: blob}
if attachment != nil {
attachments = append(attachments, *attachment)
}
err := b.handler.MessageHandle(ctx, b, message, blobs)
err = b.handler.MessageHandle(ctx, b, message, attachments)
if err != nil {
return err
}
@ -32,7 +31,7 @@ func (b *Bot) handleSingleMessages(ctx context.Context, messages []Message) erro
func (b *Bot) handleGroupMessages(ctx context.Context, groupMessages []Message) error {
captions := make(map[string]string, len(groupMessages))
messages := make(map[string]Message, len(groupMessages))
blobs := make(map[string]map[string][]byte, len(groupMessages))
attachments := make(map[string][]Attachment, len(groupMessages))
// Group all captions, blobs and messages
for _, message := range groupMessages {
@ -44,14 +43,14 @@ func (b *Bot) handleGroupMessages(ctx context.Context, groupMessages []Message)
captions[groupID] += *message.Caption
}
filepath, blob, err := b.downloadFileID(ctx, message.GetMaxPhotoFileID())
attachment, err := b.downloadAttachment(ctx, &message)
if err != nil {
return fmt.Errorf("fail to downloadFileID")
return err
}
if _, found := blobs[groupID]; !found {
blobs[groupID] = make(map[string][]byte)
if attachment != nil {
attachments[groupID] = append(attachments[groupID], *attachment)
}
blobs[groupID][filepath] = blob
}
// Handle each group message
@ -59,7 +58,7 @@ func (b *Bot) handleGroupMessages(ctx context.Context, groupMessages []Message)
// replace Caption with all Caption in the group
caption := captions[groupID]
message.Caption = &caption
err := b.handler.MessageHandle(ctx, b, message, blobs[groupID])
err := b.handler.MessageHandle(ctx, b, message, attachments[groupID])
if err != nil {
return err
}

View File

@ -1,14 +1,26 @@
package telegram
import "fmt"
type Message struct {
MessageID int `json:"message_id"`
From User `json:"from"`
Date int `json:"date"`
Text *string `json:"text"`
Chat *Chat `json:"chat"`
MediaGroupID *string `json:"media_group_id"`
Photo []PhotoSize `json:"photo"`
Caption *string `json:"caption"`
MessageID int `json:"message_id"` // MessageID is a unique message identifier inside this chat
From User `json:"from"` // From is a sender, empty for messages sent to channels;
Date int `json:"date"` // Date of the message was sent in Unix time
Text *string `json:"text"` // Text is for text messages, the actual UTF-8 text of the message, 0-4096 characters;
Chat *Chat `json:"chat"` // Chat is the conversation the message belongs to
ForwardFromChat *Chat `json:"forward_from_chat"` // ForwardFromChat for messages forwarded from channels, information about the original channel;
ForwardFromMessageID int `json:"forward_from_message_id"` // ForwardFromMessageID for messages forwarded from channels, identifier of the original message in the channel;
MediaGroupID *string `json:"media_group_id"` // MediaGroupID is the unique identifier of a media message group this message belongs to;
Photo []PhotoSize `json:"photo"` // Photo message is a photo, available sizes of the photo;
Caption *string `json:"caption"` // Caption for the animation, audio, document, photo, video or voice, 0-1024 characters;
Entities []MessageEntity `json:"entities"` // Entities are for text messages, special entities like usernames, URLs, bot commands, etc. that appear in the text;
CaptionEntities []MessageEntity `json:"caption_entities"`
Document *Document `json:"document"` // Document message is a general file, information about the file;
Video *Video `json:"video"` // Video message is a video, information about the video;
VideoNote *VideoNote `json:"video_note"` // VideoNote message is a video note, information about the video message;
Voice *Voice `json:"voice"` // Voice message is a voice message, information about the file;
Audio *Audio `json:"audio"` // Audio message is an audio file, information about the file;
Animation *Animation `json:"animation"` // Animation message is an animation, information about the animation. For backward compatibility, when this field is set, the document field will also be set;
}
func (m Message) GetMaxPhotoFileID() string {
@ -22,3 +34,16 @@ func (m Message) GetMaxPhotoFileID() string {
return photoSize.FileID
}
func (m Message) GetMessageLink() string {
if m.ForwardFromChat != nil && m.ForwardFromChat.Type == Channel {
return fmt.Sprintf("https://t.me/%s/%d", m.ForwardFromChat.UserName, m.ForwardFromMessageID)
}
return ""
}
func (m Message) IsSupported() bool {
return m.Text != nil || m.Caption != nil || m.Document != nil || m.Photo != nil || m.Video != nil ||
m.Voice != nil || m.VideoNote != nil || m.Audio != nil || m.Animation != nil
}

View File

@ -0,0 +1,31 @@
package telegram
type MessageEntityType string
const (
Mention = "mention" // “mention” (@username)
Hashtag = "hashtag" // “hashtag” (#hashtag)
CashTag = "cashtag" // “cashtag” ($USD)
BotCommand = "bot_command" // “bot_command” (/start@jobs_bot)
URL = "url" // “url” (https://telegram.org)
Email = "email" // “email” (do-not-reply@telegram.org)
PhoneNumber = "phone_number" // “phone_number” (+1-212-555-0123)
Bold = "bold" // “bold” (bold text)
Italic = "italic" // “italic” (italic text)
Underline = "underline" // “underline” (underlined text)
Strikethrough = "strikethrough" // “strikethrough” (strikethrough text)
Code = "code" // “code” (monowidth string)
Pre = "pre" // “pre” (monowidth block)
TextLink = "text_link" // “text_link” (for clickable text URLs)
TextMention = "text_mention" // “text_mention” (for users without usernames)
)
// MessageEntity represents one special entity in a text message.
type MessageEntity struct {
Type MessageEntityType `json:"type"` // Type of the entity.
Offset int `json:"offset"` // Offset in UTF-16 code units to the start of the entity
Length int `json:"length"`
URL string `json:"url"` // URL for “text_link” only, url that will be opened after user taps on the text
User *User `json:"user"` // User for “text_mention” only, the mentioned user
Language string `json:"language"` // Language for “pre” only, the programming language of the entity text
}

14
plugin/telegram/video.go Normal file
View File

@ -0,0 +1,14 @@
package telegram
// Video represents a video file.
type Video struct {
FileID string `json:"file_id"` // FileID identifier for this file, which can be used to download or reuse
FileUniqueID string `json:"file_unique_id"` // FileUniqueID is the unique identifier for this file, which is supposed to be the same over time and for different bots. Can't be used to download or reuse the file.
Width int `json:"width"` // Width video width as defined by sender
Height int `json:"height"` // Height video height as defined by sender
Duration int `json:"duration"` // Duration of the video in seconds as defined by sender
Thumbnail *PhotoSize `json:"thumb"` // Thumbnail video thumbnail
FileName string `json:"file_name"` // FileName is the original filename as defined by sender
MimeType string `json:"mime_type"` // MimeType of a file as defined by sender
FileSize int `json:"file_size"`
}

View File

@ -0,0 +1,11 @@
package telegram
// VideoNote object represents a video message.
type VideoNote struct {
FileID string `json:"file_id"` // FileID identifier for this file, which can be used to download or reuse the file
FileUniqueID string `json:"file_unique_id"` // FileUniqueID is the unique identifier for this file, which is supposed to be the same over time and for different bots. Can't be used to download or reuse the file.
Length int `json:"length"` // Length video width and height (diameter of the video message) as defined by sender
Duration int `json:"duration"` // Duration of the video in seconds as defined by sender
Thumbnail *PhotoSize `json:"thumb,omitempty"` // Thumbnail video thumbnail
FileSize int `json:"file_size"`
}

10
plugin/telegram/voice.go Normal file
View File

@ -0,0 +1,10 @@
package telegram
// Voice represents a voice note.
type Voice struct {
FileID string `json:"file_id"` // FileID identifier for this file, which can be used to download or reuse the file
FileUniqueID string `json:"file_unique_id"` // FileUniqueID is the unique identifier for this file, which is supposed to be the same over time and for different bots. Can't be used to download or reuse the file.
Duration int `json:"duration"` // Duration of the audio in seconds as defined by sender
MimeType string `json:"mime_type"` // MimeType of the file as defined by sender
FileSize int `json:"file_size"`
}

View File

@ -4,8 +4,8 @@ import (
"context"
"encoding/json"
"fmt"
"path"
"strconv"
"unicode/utf16"
"github.com/pkg/errors"
apiv1 "github.com/usememos/memos/api/v1"
@ -30,7 +30,7 @@ const (
successMessage = "Success"
)
func (t *telegramHandler) MessageHandle(ctx context.Context, bot *telegram.Bot, message telegram.Message, blobs map[string][]byte) error {
func (t *telegramHandler) MessageHandle(ctx context.Context, bot *telegram.Bot, message telegram.Message, attachments []telegram.Attachment) error {
reply, err := bot.SendReplyMessage(ctx, message.Chat.ID, message.MessageID, workingMessage)
if err != nil {
return fmt.Errorf("fail to SendReplyMessage: %s", err)
@ -65,10 +65,15 @@ func (t *telegramHandler) MessageHandle(ctx context.Context, bot *telegram.Bot,
}
if message.Text != nil {
create.Content = *message.Text
create.Content = convertToMarkdown(*message.Text, message.Entities)
}
if blobs != nil && message.Caption != nil {
create.Content = *message.Caption
if message.Caption != nil {
create.Content = convertToMarkdown(*message.Caption, message.CaptionEntities)
}
if message.ForwardFromChat != nil {
create.Content += fmt.Sprintf("\n\n[Message link](%s)", message.GetMessageLink())
}
memoMessage, err := t.store.CreateMemo(ctx, create)
@ -78,21 +83,13 @@ func (t *telegramHandler) MessageHandle(ctx context.Context, bot *telegram.Bot,
}
// create resources
for filename, blob := range blobs {
// TODO support more
mime := "application/octet-stream"
switch path.Ext(filename) {
case ".jpg":
mime = "image/jpeg"
case ".png":
mime = "image/png"
}
for _, attachment := range attachments {
resource, err := t.store.CreateResource(ctx, &store.Resource{
CreatorID: creatorID,
Filename: filename,
Type: mime,
Size: int64(len(blob)),
Blob: blob,
Filename: attachment.FileName,
Type: attachment.GetMimeType(),
Size: attachment.FileSize,
Blob: attachment.Data,
})
if err != nil {
_, err := bot.EditMessage(ctx, message.Chat.ID, reply.MessageID, fmt.Sprintf("failed to CreateResource: %s", err), nil)
@ -158,3 +155,51 @@ func generateKeyboardForMemoID(id int) [][]telegram.InlineKeyboardButton {
return [][]telegram.InlineKeyboardButton{buttons}
}
func convertToMarkdown(text string, messageEntities []telegram.MessageEntity) string {
insertions := make(map[int]string)
for _, e := range messageEntities {
var before, after string
// this is supported by the current markdown
switch e.Type {
case telegram.Bold:
before = "**"
after = "**"
case telegram.Italic:
before = "*"
after = "*"
case telegram.Strikethrough:
before = "~~"
after = "~~"
case telegram.Code:
before = "`"
after = "`"
case telegram.Pre:
before = "```" + e.Language
after = "```"
case telegram.TextLink:
before = "["
after = fmt.Sprintf(`](%s)`, e.URL)
}
if before != "" {
insertions[e.Offset] += before
insertions[e.Offset+e.Length] = after + insertions[e.Offset+e.Length]
}
}
input := []rune(text)
var output []rune
utf16pos := 0
for i := 0; i < len(input); i++ {
output = append(output, []rune(insertions[utf16pos])...)
output = append(output, input[i])
utf16pos += len(utf16.Encode([]rune{input[i]}))
}
output = append(output, []rune(insertions[utf16pos])...)
return string(output)
}