feat: raster support (#22)

This commit is contained in:
Fathy Boundjadj 2022-11-18 10:48:10 +01:00 committed by GitHub
parent a81f5b38a3
commit 126e0da39e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 467 additions and 181 deletions

View File

@ -56,7 +56,7 @@ COPY package.json yarn.lock /app/
RUN yarn --production
COPY --from=html2svg-js /app/build /app/build
COPY --from=html2svg-binaries /runtime /runtime
COPY --from=html2svg-binaries /runtime /app/build/runtime
COPY /scripts/docker-entrypoint.sh /app/scripts/docker-entrypoint.sh
ENTRYPOINT ["/app/scripts/docker-entrypoint.sh"]

View File

@ -1,18 +1,20 @@
# `html2svg`
Convert HTML and `<canvas>` to SVG or PDF using Chromium. [Read the blog post](https://fathy.fr/html2svg).
Convert HTML and `<canvas>` to vector (SVG, PDF) or bitmap (PNG, JPEG, WebP) images using Chromium. [Read the blog post](https://fathy.fr/html2svg).
## Usage
```shell
# export to SVG
# Export to SVG
$ docker run fathyb/html2svg https://google.com > google.svg
$ docker run fathyb/html2svg https://google.com --format svg > google.svg
# export to PDF
# Export to PDF
$ docker run fathyb/html2svg https://google.com --format pdf > google.pdf
# show help
# Export to PNG
$ docker run fathyb/html2svg https://google.com --format png > google.png
# Display help
$ docker run fathyb/html2svg --help
Usage: html2svg [options] <url>
Usage: html2svg [options] [command] <url>
Arguments:
url URL to the web page to render
@ -22,8 +24,36 @@ Options:
-w, --wait <seconds> set the amount of seconds to wait between the page loaded event and taking the screenshot (default: 1)
-w, --width <width> set the viewport width in pixels (default: 1920)
-h, --height <height> set the viewport height in pixels (default: 1080)
-f, --format <format> set the output format, should one of these values: svg, pdf (default: "svg")
-f, --format <format> set the output format, should one of these values: svg, pdf, png, jpg, webp (default: "svg")
--help display help for command
Commands:
serve [options]
```
### Server
An HTTP server is also provided, all CLI options are supported:
```shell
# Start a server on port 8080
$ docker run -p 8080:8080 fathyb/html2svg serve
# Export to SVG
$ curl -d http://google.fr http://localhost:8080 > google.svg
$ curl -d '{"url": "http://google.fr", "format": "svg"}' http://localhost:8080 > google.svg
# Export to PDF
$ curl -d '{"url": "http://google.fr", "format": "pdf"}' http://localhost:8080 > google.pdf
# Export to PNG
$ curl -d '{"url": "http://google.fr", "format": "png"}' http://localhost:8080 > google.png
# Display help
$ docker run fathyb/html2svg serve --help
Usage: html2svg serve [options]
Options:
-H, --host <hostname> set the hostname to listen on (default: "localhost")
-p, --port <hostname> set the port to listen on (default: 8080)
-u, --unix <path> set the unix socket to listen on
-h, --help display help for command
```
## Development

View File

@ -5,5 +5,4 @@ set -e
export DISPLAY=:99
Xvfb $DISPLAY -screen 0 1920x1080x24 &
/runtime/electron --no-sandbox --headless --disable-audio-output --mute-audio --force-color-profile=srgb --disable-dev-shm-usage /app/build/html2svg.js "$@"
node /app/build/html2svg.cli.js "$@"

View File

@ -1,16 +1,19 @@
diff --git a/content/renderer/render_frame_impl.cc b/content/renderer/render_frame_impl.cc
index 97cf24ad5f4a6..ab3f90752b11a 100644
index 97cf24ad5f4a6..ce12415534d20 100644
--- a/content/renderer/render_frame_impl.cc
+++ b/content/renderer/render_frame_impl.cc
@@ -256,6 +256,15 @@
@@ -256,6 +256,18 @@
#include "content/renderer/java/gin_java_bridge_dispatcher.h"
#endif
+// html2svg includes
+#include <stdlib.h>
+#include <iostream>
+#include "cc/paint/paint_recorder.h"
+#include "cc/paint/skia_paint_canvas.h"
+#include "third_party/skia/include/core/SkEncodedImageFormat.h"
+#include "third_party/skia/include/core/SkStream.h"
+#include "third_party/skia/include/core/SkSurface.h"
+#include "third_party/skia/include/docs/SkPDFDocument.h"
+#include "third_party/skia/include/svg/SkSVGCanvas.h"
+#include "third_party/skia/include/svg/SkSVGCanvas.h"
@ -18,59 +21,11 @@ index 97cf24ad5f4a6..ab3f90752b11a 100644
using base::Time;
using blink::ContextMenuData;
using blink::WebContentDecryptionModule;
@@ -3822,6 +3831,135 @@ void RenderFrameImpl::DidClearWindowObject() {
@@ -3822,6 +3834,126 @@ void RenderFrameImpl::DidClearWindowObject() {
for (auto& observer : observers_)
observer.DidClearWindowObject();
+
+ // A Skia stream writing to stdout
+ class StdoutStream : public SkWStream {
+ public:
+ ~StdoutStream() override {
+ flush();
+
+ delete[] fBytes;
+ }
+
+ bool write(const void* data, size_t size) override {
+ auto* buffer = static_cast<const char*>(data);
+ size_t remaining = size;
+ size_t bufferSize = 8 * 1024;
+
+ while (remaining != 0) {
+ ssize_t length = std::min(bufferSize - fBytesBuffered, remaining);
+
+ std::memcpy(&fBytes[fBytesBuffered], &buffer[size - remaining], length);
+
+ remaining -= length;
+ fBytesWritten += length;
+ fBytesBuffered += length;
+
+ if (fBytesBuffered == bufferSize) {
+ flush();
+ }
+ }
+
+ return true;
+ }
+
+ void flush() override {
+ if (::write(1, fBytes, fBytesBuffered) != -1) {
+ fBytesBuffered = 0;
+ fflush(stdout);
+ }
+ }
+
+ size_t bytesWritten() const override {
+ return fBytesWritten;
+ }
+
+ private:
+ char* fBytes = new char[8 * 1024];
+ size_t fBytesWritten = 0;
+ size_t fBytesBuffered = 0;
+ };
+
+ // Get access to the JS VM for this process (each tab is a process)
+ v8::Isolate *isolate = blink::MainThreadIsolate();
+ // Auto-clean v8 handles
@ -114,11 +69,12 @@ index 97cf24ad5f4a6..ab3f90752b11a 100644
+ );
+
+ // Create a memory stream to save the SVG content
+ StdoutStream stream;
+ SkDynamicMemoryWStream stream;
+ // Get the recording data
+ auto picture = recorder.finishRecordingAsPicture();
+ auto mode = args[1]->ToUint32(context).ToLocalChecked()->Value();
+
+ switch(args[1]->ToUint32(context).ToLocalChecked()->Value()) {
+ switch(mode) {
+ // SVG
+ case 0: {
+ picture->Playback(SkSVGCanvas::Make(rect, &stream).get());
@ -141,16 +97,54 @@ index 97cf24ad5f4a6..ab3f90752b11a 100644
+
+ break;
+ }
+ default: {
+ auto surface = SkSurface::MakeRasterN32Premul(width, height);
+
+ picture->Playback(surface->getCanvas());
+
+ auto img = surface->makeImageSnapshot();
+
+ assert(img != nullptr);
+
+ auto result = img->encodeToData(
+ [mode]() -> SkEncodedImageFormat {
+ switch(mode) {
+ case 3:
+ return SkEncodedImageFormat::kJPEG;
+ case 4:
+ return SkEncodedImageFormat::kWEBP;
+ default:
+ return SkEncodedImageFormat::kPNG;
+ }
+ }(),
+ 100
+ );
+
+ assert(result != nullptr);
+
+ stream.write(result->data(), result->size());
+
+ break;
+ }
+ }
+
+ auto buffer = v8::ArrayBuffer::New(isolate, stream.bytesWritten());
+
+ stream.copyTo(buffer->Data());
+ args.GetReturnValue().Set(buffer);
+ }
+ );
+
+ // Register the function as "getPageContentsAsSVG"
+ global->Set(
+ context,
+ v8::String::NewFromUtf8(isolate, "getPageContentsAsSVG").ToLocalChecked(),
+ fn->GetFunction(context).ToLocalChecked()
+ context,
+ v8::String::NewFromUtf8(isolate, "getPageContentsAsSVG").ToLocalChecked(),
+ fn->GetFunction(context).ToLocalChecked()
+ ).Check();
+
+ if (command_line.HasSwitch("html2svg-svg-mode")) {
+ setenv("html2svg_svg_mode", "true", 1);
+ }
}
void RenderFrameImpl::DidCreateDocumentElement() {

187
src/html2svg.cli.ts Normal file
View File

@ -0,0 +1,187 @@
import { join } from 'path'
import { tmpdir } from 'os'
import { program } from 'commander'
import { pipeline } from 'stream/promises'
import { mkdir, rm } from 'fs/promises'
import { randomBytes } from 'crypto'
import { ListenOptions } from 'net'
import { ChildProcess, spawn } from 'child_process'
import { IncomingMessage, request } from 'http'
import { Options } from './html2svg'
if (require.main === module) {
const entry = process.argv.find((a) => a.endsWith(__filename))
const index = entry ? process.argv.indexOf(entry) : -1
const args = process.argv.slice(Math.max(2, index + 1))
cli(args)
.then(() => process.exit(0))
.catch((error) => {
console.error(error)
process.exit(1)
})
}
export async function cli(args: string[]) {
program
.name('html2svg')
.showHelpAfterError()
.showSuggestionAfterError()
.argument('<url>', 'URL to the web page to render')
.option('-f, --full', 'capture the entire page')
.option(
'-w, --wait <seconds>',
'set the amount of seconds to wait between the page loaded event and taking the screenshot',
validateInt,
1,
)
.option(
'-w, --width <width>',
'set the viewport width in pixels',
validateInt,
1920,
)
.option(
'-h, --height <height>',
'set the viewport height in pixels',
validateInt,
1080,
)
.option(
'-f, --format <format>',
'set the output format, should one of these values: svg, pdf, png, jpg, webp',
'svg',
)
.action(async (url, options) => {
const id = Array.from(randomBytes(16))
.map((x) => x.toString(36).padStart(2, '0'))
.join('')
const dir = join(tmpdir(), 'html2svg-server')
const path = join(dir, `${id}.sock`)
await mkdir(dir, { recursive: true })
try {
const server = serve({ path, log: false })
await Promise.all([
server.wait(),
callServer(url, options, server.process, path),
])
} finally {
await rm(path, { force: true })
}
})
program
.command('serve')
.option(
'-H, --host <hostname>',
'set the hostname to listen on',
'0.0.0.0',
)
.option(
'-p, --port <hostname>',
'set the port to listen on',
validateInt,
8080,
)
.option('-u, --unix <path>', 'set the unix socket to listen on')
.action(
async ({ host, port, unix }) =>
await serve(unix ? { path: unix } : { host, port }).wait(),
)
await program.parseAsync(args, { from: 'user' })
}
async function callServer(
url: string,
options: Options,
server: ChildProcess,
socketPath: string,
) {
const start = Date.now()
while (Date.now() - start < 10_000) {
const done = await new Promise<boolean>((resolve, reject) =>
request({ method: 'POST', socketPath })
.on('error', (error: any) => {
if (error?.code === 'ENOENT') {
resolve(false)
} else {
reject(error)
}
})
.on('response', (res) =>
printRequest(res)
.then(() => resolve(true))
.catch(reject),
)
.end(JSON.stringify({ url, ...options })),
)
if (done) {
return server.kill()
} else {
await sleep(100)
}
}
throw new Error('Timed out waiting for server to start')
}
async function printRequest(res: IncomingMessage) {
if (res.statusCode !== 200) {
throw new Error(`Server error ${res.statusCode}`)
}
await pipeline(res, process.stdout)
}
function validateInt(string: string) {
const number = parseInt(string, 10)
if (Number.isNaN(number)) {
throw new Error(`Invalid number value: ${string}`)
}
return number
}
async function sleep(ms: number) {
await new Promise<void>((resolve) => setTimeout(resolve, ms))
}
function serve(options: ListenOptions & { log?: boolean }) {
const child = spawn(
require.resolve('./runtime/electron'),
['--no-sandbox', require.resolve('./html2svg.server')],
{
stdio: 'inherit',
env: {
...process.env,
HTML2SVG_SERVER_OPTIONS: JSON.stringify(options),
},
},
)
return {
process: child,
async wait() {
await new Promise<void>((resolve, reject) =>
child.on('error', reject).on('close', (code, signal) => {
if (signal) {
reject(new Error(`Server quit with signal ${signal}`))
} else if (code !== 0) {
reject(new Error(`Server quit with code ${code}`))
} else {
resolve()
}
}),
)
},
}
}

88
src/html2svg.server.ts Normal file
View File

@ -0,0 +1,88 @@
import { createServer } from 'http'
import { ListenOptions } from 'net'
import { readStream } from './read-stream'
import { html2svg, Options } from './html2svg'
if (require.main === module) {
const options = JSON.parse(process.env.HTML2SVG_SERVER_OPTIONS ?? '{}')
const { path, host, port, log } = options
server(options)
.then(() => {
if (log !== false) {
process.stderr.write(
`Listening on ${
path ? `unix socket ${path}` : `${host}:${port}`
}\n`,
)
}
})
.catch((error) => {
console.error(error)
process.exit(1)
})
}
export async function server(listen: ListenOptions) {
const server = createServer((req, res) => {
const { url } = req
if (url !== '/') {
return res.writeHead(404).end('Not Found')
}
readStream(req)
.then(async (data) => {
const body = parseOptions(parseJSON(data.toString('utf-8')))
if (!body) {
return res.writeHead(400).end('Invalid request params')
}
const buffer = await html2svg(body.url, body.options)
res.writeHead(200).end(buffer)
})
.catch((error) => {
console.error('Internal server error', error)
res.writeHead(500).end('Internal Server Error')
})
})
await new Promise<void>((resolve, reject) =>
server.on('error', reject).on('listening', resolve).listen(listen),
)
}
function parseOptions(data: any): null | { url: string; options?: Options } {
if (!data) {
return null
}
if (typeof data === 'string') {
return { url: data }
}
if (typeof data !== 'object') {
return null
}
const { url, ...options } = data
if (typeof url !== 'string') {
return null
}
return { url, options }
}
function parseJSON(data: string) {
try {
return JSON.parse(data)
} catch {
return data
}
}

View File

@ -1,129 +1,110 @@
import { program } from 'commander'
import { app, BrowserWindow } from 'electron'
const entry = process.argv.find((a) => a.endsWith('html2svg.js'))
const index = entry ? process.argv.indexOf(entry) : -1
const args = process.argv.slice(Math.max(2, index + 1))
export interface Options {
full?: boolean
wait?: number
width?: number
height?: number
format?: 'svg' | 'pdf' | 'png' | 'jpg' | 'webp'
}
program
.name('html2svg')
.showHelpAfterError()
.showSuggestionAfterError()
.argument('<url>', 'URL to the web page to render')
.option('-f, --full', 'capture the entire page')
.option(
'-w, --wait <seconds>',
'set the amount of seconds to wait between the page loaded event and taking the screenshot',
validateInt,
1,
)
.option(
'-w, --width <width>',
'set the viewport width in pixels',
validateInt,
1920,
)
.option(
'-h, --height <height>',
'set the viewport height in pixels',
validateInt,
1080,
)
.option(
'-f, --format <format>',
'set the output format, should one of these values: svg, pdf',
'svg',
)
.action(async (url, { full, wait, width, height, format }) => {
const mode = getMode(format)
app.dock?.hide()
app.disableHardwareAcceleration()
app.commandLine.appendSwitch('no-sandbox')
app.on('window-all-closed', () => {})
if (format === 'svg') {
process.env.html2svg_svg_mode = 'true'
}
export async function html2svg(
url: string,
{ full, wait, format, width = 1920, height = 1080 }: Options = {},
) {
const mode = getMode(format ?? 'svg')
app.dock?.hide()
app.disableHardwareAcceleration()
app.commandLine.appendSwitch('headless')
app.commandLine.appendSwitch('no-sandbox')
app.commandLine.appendSwitch('disable-gpu')
await app.whenReady()
await app.whenReady()
const args = [
'--mute-audio',
'--disable-audio-output',
'--disable-dev-shm-usage',
'--force-color-profile=srgb',
]
const page = new BrowserWindow({
width,
height,
show: false,
webPreferences: { sandbox: false },
})
if (mode === 0) {
args.push('--html2svg-svg-mode', '--disable-remote-fonts')
}
try {
await new Promise<void>((resolve, reject) =>
Promise.resolve()
.then(async () => {
const timeout = setTimeout(() => {
page.webContents.off('did-finish-load', listener)
const page = new BrowserWindow({
width,
height,
show: false,
webPreferences: {
sandbox: false,
offscreen: true,
additionalArguments: args,
},
})
reject(new Error('timeout'))
}, 10_000)
const listener = () => {
clearTimeout(timeout)
try {
await new Promise<void>((resolve, reject) =>
Promise.resolve()
.then(async () => {
const timeout = setTimeout(() => {
page.webContents.off('did-finish-load', listener)
resolve()
}
reject(new Error('timeout'))
}, 10_000)
const listener = () => {
clearTimeout(timeout)
page.webContents.once('did-finish-load', listener)
resolve()
}
await page.loadURL(url)
})
.catch(reject),
)
page.webContents.once('did-finish-load', listener)
await page.webContents.executeJavaScript(
`
new Promise(resolve => {
const style = document.createElement('style')
await page.loadURL(url)
})
.catch(reject),
)
style.innerHTML = trustedTypes
.createPolicy('html2svg/scrollbar-css', { createHTML: x => x })
.createHTML(\`
*::-webkit-scrollbar,
*::-webkit-scrollbar-track,
*::-webkit-scrollbar-thumb {
display: none;
}
\`)
const buffer: ArrayBuffer = await page.webContents.executeJavaScript(
`
new Promise(resolve => {
const style = document.createElement('style')
document.head.appendChild(style)
scrollTo({ top: document.body.scrollHeight })
style.innerHTML = trustedTypes
.createPolicy('html2svg/scrollbar-css', { createHTML: x => x })
.createHTML(\`
*::-webkit-scrollbar,
*::-webkit-scrollbar-track,
*::-webkit-scrollbar-thumb {
display: none;
}
\`)
requestAnimationFrame(() => {
scrollTo({ top: 0 })
document.head.appendChild(style)
scrollTo({ top: document.body.scrollHeight })
requestAnimationFrame(() =>
setTimeout(resolve, ${wait * 1000})
)
})
}).then(() =>
getPageContentsAsSVG(
${full ? 0 : height} * devicePixelRatio,
${mode},
document.title,
requestAnimationFrame(() => {
scrollTo({ top: 0 })
requestAnimationFrame(() =>
setTimeout(resolve, ${(wait ?? 0) * 1000})
)
})
}).then(() =>
getPageContentsAsSVG(
${full ? 0 : height} * devicePixelRatio,
${mode},
document.title,
)
`,
)
} finally {
page.destroy()
}
)
`,
)
process.exit(0)
})
.parseAsync(args, { from: 'user' })
.catch((error) => {
console.error(error)
process.exit(1)
})
return Buffer.from(buffer)
} finally {
page.destroy()
}
}
function getMode(format: string) {
switch (format) {
@ -131,17 +112,14 @@ function getMode(format: string) {
return 0
case 'pdf':
return 1
case 'png':
return 2
case 'jpg':
case 'jpeg':
return 3
case 'webp':
return 4
default:
throw new Error(`Unsupported output format: ${format}`)
}
}
function validateInt(string: string) {
const number = parseInt(string, 10)
if (Number.isNaN(number)) {
throw new Error(`Invalid number value: ${string}`)
}
return number
}

10
src/read-stream.ts Normal file
View File

@ -0,0 +1,10 @@
export function readStream(stream: NodeJS.ReadableStream) {
const chunks: Buffer[] = []
return new Promise<Buffer>((resolve, reject) =>
stream
.on('data', (chunk) => chunks.push(chunk))
.on('error', (error) => reject(error))
.on('end', () => resolve(Buffer.concat(chunks))),
)
}