Comments (30)
@raliste that sounds smart, I will give it a try. Thanks !
Update: thanks a lot, I used what you suggested with goroutines and I am down to 2.3s for 15 pages conversion at 200 DPI. Awesome.
from go-fitz.
@xiaoxfan sure
here is the code:
func ToPNGs(input []byte, opts *Options) ([][]byte, error) {
var pngs [][]byte
doc, err := fitz.NewFromMemory(input)
defer doc.Close()
if err != nil {
return pngs, errors.WithStack(err)
}
numPage := doc.NumPage()
pngs = make([][]byte, numPage)
if err = assertNumPage(numPage); err != nil {
return pngs, errors.WithStack(err)
}
// // SINGLE-THREADED
// for n := 0; n < numPage; n++ {
// var png []byte
// // 72: 200ms, 144: 400ms, 200: 600ms, etc.
// png, err = doc.ImagePNG(n, opts.dpi())
// if err != nil {
// if logger != nil {
// // handle single page error here
// }
// continue
// }
// pngs[n] = png
// }
// END SINGLE-THREADED
// MULTI-THREADED
type imageResult struct {
data *[]byte
index int
}
chunks := createChunks(numPage, concurrency())
ch := make(chan error, len(chunks))
for _, chunk := range chunks {
go func(chunk []int) {
doc, err := fitz.NewFromMemory(input)
if err != nil {
ch <- errors.WithStack(err)
return
}
for _, index := range chunk {
buf, err := doc.ImagePNG(index, opts.dpi())
if err != nil {
ch <- errors.WithStack(err)
return
}
pngs[index] = buf
}
doc.Close()
ch <- nil
}(chunk)
}
for _ = range chunks {
if err := <-ch; err != nil {
if logger != nil {
// handle single page errors here
}
}
}
// END MULTI-THREADED
return pngs, nil
}
It contains both single-threaded and multi-threaded versions that are separated with comments like // MULTI-THREADED
In this sample single-threaded mode is disabled
It takes a lot of memory but it is faster
from go-fitz.
@PuKoren
Thanks for your sharing. It's very helpful.
from go-fitz.
@PuKoren Hello, I'm so sorry to bother you again.
Below is my code. Some pdf work well and faster,but some pdf got segmentation violation error and looks like non-random.
I am very confused.Is there any way to solve this problem? thanks.
I use centos7
func Pdf2Images1(src []byte, dpi float64, pageLimit int) ([][]byte, error) {
if dpi <= 0 {
dpi = defaultDPI
}
doc, err := fitz.NewFromMemory(src)
if err != nil {
return nil, err
}
defer doc.Close()
if pageLimit > 0 && doc.NumPage() > pageLimit {
return nil, PageSizeErr
}
ret := make([][]byte, doc.NumPage())
wg := new(sync.WaitGroup)
wg.Add(doc.NumPage())
for n := 0; n < doc.NumPage(); n++ {
go func(n int) {
defer wg.Done()
doc, err := fitz.NewFromMemory(src)
if err != nil {
log.Println(err)
return
}
defer doc.Close()
ret[n], err = doc.ImagePNG(n, dpi)
if err != nil {
log.Println(err)
return
}
}(n)
}
wg.Wait()
return ret, nil
}
panic log
fatal error: unexpected signal during runtime execution
fatal error: unexpected signal during runtime execution
[signal SIGSEGV: segmentation violation code=0x1 addr=0x10 pc=0x93ba62]
runtime stack:
runtime.throw(0xc408b8, 0x2a)
/usr/local/go/src/runtime/panic.go:1117 +0x72
runtime.sigpanic()
/usr/local/go/src/runtime/signal_unix.go:718 +0x2e5
goroutine 30 [syscall]:
runtime.cgocall(0x8f7d00, 0xc0002e7610, 0xc0000241b0)
/usr/local/go/src/runtime/cgocall.go:154 +0x5b fp=0xc0002e75e0 sp=0xc0002e75a8 pc=0x4328db
github.com/gen2brain/go-fitz._Cfunc_fz_run_page(0x7f6cb0000a10, 0x7f6cb0029ff0, 0x7f6cb002a2c0, 0xc0000241b0, 0x0)
_cgo_gotypes.go:1383 +0x45 fp=0xc0002e7610 sp=0xc0002e75e0 pc=0x8e1d45
github.com/gen2brain/go-fitz.(*Document).ImagePNG.func10(0xc00018e000, 0x7f6cb0029ff0, 0x7f6cb002a2c0, 0xc0000241b0)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0xc5 fp=0xc0002e7650 sp=0xc0002e7610 pc=0x8e3b85
github.com/gen2brain/go-fitz.(*Document).ImagePNG(0xc00018e000, 0x2, 0x4052000000000000, 0x0, 0x0, 0x0, 0x0, 0x0)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0x32b fp=0xc0002e7710 sp=0xc0002e7650 pc=0x8e276b
split-pdf/util.Pdf2Images1.func1(0xc00031a030, 0xc000680000, 0x435042, 0x470000, 0xc00031a020, 0xc00038e000, 0x5, 0x5, 0x2)
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:64 +0x12e fp=0xc0002e7798 sp=0xc0002e7710 pc=0x8f5d0e
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1371 +0x1 fp=0xc0002e77a0 sp=0xc0002e7798 pc=0x49af01
created by split-pdf/util.Pdf2Images1
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:56 +0x20e
goroutine 1 [IO wait]:
internal/poll.runtime_pollWait(0x7f6ccac6aeb8, 0x72, 0x0)
/usr/local/go/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc00038e198, 0x72, 0x0, 0x0, 0xc2bce2)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Accept(0xc00038e180, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/internal/poll/fd_unix.go:401 +0x212
net.(*netFD).accept(0xc00038e180, 0x773bbd96961e7401, 0x0, 0x0)
/usr/local/go/src/net/fd_unix.go:172 +0x45
net.(*TCPListener).accept(0xc000394180, 0x6081471f, 0xc00029fcc8, 0x4f3406)
/usr/local/go/src/net/tcpsock_posix.go:139 +0x32
net.(*TCPListener).Accept(0xc000394180, 0xc00029fd18, 0x18, 0xc000000180, 0x6d541b)
/usr/local/go/src/net/tcpsock.go:261 +0x65
net/http.(*Server).Serve(0xc0003d20e0, 0xcdf620, 0xc000394180, 0x0, 0x0)
/usr/local/go/src/net/http/server.go:2981 +0x285
net/http.(*Server).ListenAndServe(0xc0003d20e0, 0xc0003d20e0, 0xc00029fec8)
/usr/local/go/src/net/http/server.go:2910 +0xba
net/http.ListenAndServe(...)
/usr/local/go/src/net/http/server.go:3164
github.com/gin-gonic/gin.(*Engine).Run(0xc0003c6340, 0xc00029ff58, 0x1, 0x1, 0x0, 0x0)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/gin.go:336 +0x1ba
main.main()
/home/xiaofan/workspace/go/split-pdf/main.go:38 +0x3e5
goroutine 18 [semacquire]:
sync.runtime_Semacquire(0xc00031a038)
/usr/local/go/src/runtime/sema.go:56 +0x45
sync.(*WaitGroup).Wait(0xc00031a030)
/usr/local/go/src/sync/waitgroup.go:130 +0x65
split-pdf/util.Pdf2Images1(0xc000680000, 0x435042, 0x470000, 0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:71 +0x247
main.SplitPDF(0xc000392200)
/home/xiaofan/workspace/go/split-pdf/main.go:97 +0x46d
github.com/gin-gonic/gin.(*Context).Next(...)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/context.go:165
github.com/gin-gonic/gin.LoggerWithConfig.func1(0xc000392200)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/logger.go:241 +0xf4
github.com/gin-gonic/gin.(*Context).Next(...)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/context.go:165
github.com/gin-gonic/gin.CustomRecoveryWithWriter.func1(0xc000392200)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/recovery.go:99 +0x7a
github.com/gin-gonic/gin.(*Context).Next(...)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/context.go:165
github.com/gin-gonic/gin.(*Engine).handleHTTPRequest(0xc0003c6340, 0xc000392200)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/gin.go:489 +0x2aa
github.com/gin-gonic/gin.(*Engine).ServeHTTP(0xc0003c6340, 0xcdf800, 0xc0003d21c0, 0xc000392100)
/home/xiaofan/workspace/go/pkg/mod/github.com/gin-gonic/[email protected]/gin.go:445 +0x15c
net/http.serverHandler.ServeHTTP(0xc0003d20e0, 0xcdf800, 0xc0003d21c0, 0xc000392100)
/usr/local/go/src/net/http/server.go:2887 +0xa3
net/http.(*conn).serve(0xc0003ac320, 0xce0aa0, 0xc00038c3c0)
/usr/local/go/src/net/http/server.go:1952 +0x8cd
created by net/http.(*Server).Serve
/usr/local/go/src/net/http/server.go:3013 +0x39b
goroutine 27 [IO wait]:
internal/poll.runtime_pollWait(0x7f6ccac6add0, 0x72, 0xffffffffffffffff)
/usr/local/go/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc00038e218, 0x72, 0x0, 0x1, 0xffffffffffffffff)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc00038e200, 0xc000382df1, 0x1, 0x1, 0x0, 0x0, 0x0)
/usr/local/go/src/internal/poll/fd_unix.go:166 +0x1d5
net.(*netFD).Read(0xc00038e200, 0xc000382df1, 0x1, 0x1, 0x0, 0x0, 0x0)
/usr/local/go/src/net/fd_posix.go:55 +0x4f
net.(*conn).Read(0xc0003860f8, 0xc000382df1, 0x1, 0x1, 0x0, 0x0, 0x0)
/usr/local/go/src/net/net.go:183 +0x91
net/http.(*connReader).backgroundRead(0xc000382de0)
/usr/local/go/src/net/http/server.go:692 +0x58
created by net/http.(*connReader).startBackgroundRead
/usr/local/go/src/net/http/server.go:688 +0xd5
goroutine 28 [syscall]:
github.com/gen2brain/go-fitz._Cfunc_fz_run_page(0x7f6ca80008c0, 0x7f6ca8029480, 0x7f6ca8029750, 0xc00033e018, 0x0)
_cgo_gotypes.go:1383 +0x45
github.com/gen2brain/go-fitz.(*Document).ImagePNG.func10(0xc00000e030, 0x7f6ca8029480, 0x7f6ca8029750, 0xc00033e018)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0xc5
github.com/gen2brain/go-fitz.(*Document).ImagePNG(0xc00000e030, 0x0, 0x4052000000000000, 0x0, 0x0, 0x0, 0x0, 0x0)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0x32b
split-pdf/util.Pdf2Images1.func1(0xc00031a030, 0xc000680000, 0x435042, 0x470000, 0xc00031a020, 0xc00038e000, 0x5, 0x5, 0x0)
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:64 +0x12e
created by split-pdf/util.Pdf2Images1
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:56 +0x20e
goroutine 29 [syscall]:
github.com/gen2brain/go-fitz._Cfunc_fz_run_page(0x7f6ca40008c0, 0x7f6ca40299a0, 0x7f6ca4029c70, 0xc0002bc2e8, 0x0)
_cgo_gotypes.go:1383 +0x45
github.com/gen2brain/go-fitz.(*Document).ImagePNG.func10(0xc000394000, 0x7f6ca40299a0, 0x7f6ca4029c70, 0xc0002bc2e8)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0xc5
github.com/gen2brain/go-fitz.(*Document).ImagePNG(0xc000394000, 0x1, 0x4052000000000000, 0x0, 0x0, 0x0, 0x0, 0x0)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0x32b
split-pdf/util.Pdf2Images1.func1(0xc00031a030, 0xc000680000, 0x435042, 0x470000, 0xc00031a020, 0xc00038e000, 0x5, 0x5, 0x1)
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:64 +0x12e
created by split-pdf/util.Pdf2Images1
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:56 +0x20e
goroutine 31 [syscall]:
github.com/gen2brain/go-fitz._Cfunc_fz_run_page(0x7f6cb4000a10, 0x7f6cb402a510, 0x7f6cb402d330, 0xc000590018, 0x0)
_cgo_gotypes.go:1383 +0x45
github.com/gen2brain/go-fitz.(*Document).ImagePNG.func10(0xc00058e000, 0x7f6cb402a510, 0x7f6cb402d330, 0xc000590018)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0xc5
github.com/gen2brain/go-fitz.(*Document).ImagePNG(0xc00058e000, 0x3, 0x4052000000000000, 0x0, 0x0, 0x0, 0x0, 0x0)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0x32b
split-pdf/util.Pdf2Images1.func1(0xc00031a030, 0xc000680000, 0x435042, 0x470000, 0xc00031a020, 0xc00038e000, 0x5, 0x5, 0x3)
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:64 +0x12e
created by split-pdf/util.Pdf2Images1
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:56 +0x20e
goroutine 32 [syscall]:
github.com/gen2brain/go-fitz._Cfunc_fz_run_page(0x7f6cc4029460, 0x7f6cc4053610, 0x7f6cc4055f10, 0xc000390048, 0x0)
_cgo_gotypes.go:1383 +0x45 fp=0xc0002e8610 sp=0xc0002e85e0 pc=0x8e1d45
github.com/gen2brain/go-fitz.(*Document).ImagePNG.func10(0xc0003160c0, 0x7f6cc4053610, 0x7f6cc4055f10, 0xc000390048)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0xc5 fp=0xc0002e8650 sp=0xc0002e8610 pc=0x8e3b85
github.com/gen2brain/go-fitz.(*Document).ImagePNG(0xc0003160c0, 0x4, 0x4052000000000000, 0x0, 0x0, 0x0, 0x0, 0x0)
/home/xiaofan/workspace/go/pkg/mod/github.com/gen2brain/[email protected]/fitz.go:254 +0x32b fp=0xc0002e8710 sp=0xc0002e8650 pc=0x8e276b
split-pdf/util.Pdf2Images1.func1(0xc00031a030, 0xc000680000, 0x435042, 0x470000, 0xc00031a020, 0xc00038e000, 0x5, 0x5, 0x4)
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:64 +0x12e fp=0xc0002e8798 sp=0xc0002e8710 pc=0x8f5d0e
created by split-pdf/util.Pdf2Images1
/home/xiaofan/workspace/go/split-pdf/util/pdf_linux.go:56 +0x20e
[signal SIGSEGV: segmentation violation code=0x1 addr=0x10 pc=0x93ba62]
runtime stack:
runtime.throw(0xc408b8, 0x2a)
/usr/local/go/src/runtime/panic.go:1117 +0x72
runtime.sigpanic()
/usr/local/go/src/runtime/signal_unix.go:718 +0x2e5
from go-fitz.
Hello @xiaoxfan
I faced exactly the same problem. did you manage to find some solution?
from go-fitz.
Do you have any errors? Or example?
from go-fitz.
It seems I am facing the same problem. I just got the code from example on readme, and, inside the loop I added a go routine. I adapted some code and still not able to make it work. See my adapted coded below.
package main
import (
"sync"
"fmt"
"image/jpeg"
"image"
"log"
"os"
"path/filepath"
"github.com/gen2brain/go-fitz"
)
var tmpDir = "./output"
func createOutputFolder() {
if _, err := os.Stat(tmpDir); !os.IsNotExist(err) {
fmt.Println("Deleting existing folder")
os.RemoveAll(tmpDir)
}
fmt.Println("Creating folder")
os.Mkdir(tmpDir, os.ModePerm)
}
func extractImage(doc fitz.Document, number int) (image.Image){
img, err := doc.Image(number)
if err != nil {
log.Fatal("Error while extracting image", err)
// panic(err)
}
return img
}
func writeToFile(img image.Image, number int){
name := fmt.Sprintf("test%03d.jpg", number)
fmt.Println(name)
f, err := os.Create(filepath.Join(tmpDir, name))
defer f.Close()
if err != nil {
log.Fatal("Error while creating file")
panic(err)
}
err = jpeg.Encode(f, img, &jpeg.Options{1})
if err != nil {
log.Fatal("Error while saving image")
panic(err)
}
}
func generate(doc fitz.Document, number int, wg sync.WaitGroup) {
defer wg.Done()
fmt.Println("Generating image from page", number)
img := extractImage(doc, number)
writeToFile(img, number)
}
func main() {
doc, err := fitz.New("2pages.pdf")
if err != nil {
panic(err)
}
defer doc.Close()
createOutputFolder()
var wg sync.WaitGroup
numPages := doc.NumPage()
wg.Add(numPages)
fmt.Println("===> Number of Pages", numPages)
for number := 0; number < numPages; number++ {
fmt.Println("=>", number)
go generate(*doc, number, wg)
}
wg.Wait()
fmt.Println("===> Done")
}
from go-fitz.
Also, I was using a very small document with 2 pages and text only (something like 80kb).
from go-fitz.
Methods are now protected with a mutex, so there should be no issues, but I didn't test.
from go-fitz.
I`m still not able to use goroutines...
from go-fitz.
Hello,
I think I experience a similar issue. While the code don't crash, it seems that my images are mixed together.
Here is a sample code I currently use for concurrency:
// input is my PDF as an array of bytes
doc, err := fitz.NewFromMemory(input)
if err != nil {
panic(err)
}
defer doc.Close()
pages := make([]string, doc.NumPage())
type ImageResult struct {
base64img string
index int
}
ch := make(chan ImageResult, doc.NumPage())
for n := 0; n < doc.NumPage(); n++ {
go func(index int) {
img, err := doc.Image(index)
if err != nil {
ch <- ImageResult{ "", index }
return
}
buf := new(bytes.Buffer)
err = png.Encode(buf, img)
if err != nil {
ch <- ImageResult{ "", index }
return
}
ch <- ImageResult{
base64img: base64.StdEncoding.EncodeToString(buf.Bytes()),
index: index,
}
}(n)
}
for n := 0; n < doc.NumPage(); n++ {
res := <- ch
pages[res.index] = res.base64img
}
Without the goroutine it works fine, so I guess my code is OK.
Meanwhile I will try to look at the source code of the module and try to find the cause of the issue. I really need the performance boost of the goroutine (4 sec instead of > 12 on the PDF conversion, there is a lot of images) 😄
from go-fitz.
I added a PR (#12) to fix my issue, I'm not sure if this also fixes their issue
from go-fitz.
I could test and it seems to be working right now! Thanks @PuKoren. I will try to improve performance since when I test with a 12MB file, it breaks up.
Here the code
package main
import (
"sync"
"fmt"
"image/jpeg"
"log"
"os"
"path/filepath"
"github.com/gen2brain/go-fitz"
"reflect"
"runtime"
)
var tmpDir = "./output"
func createOutputFolder() {
if _, err := os.Stat(tmpDir); !os.IsNotExist(err) {
fmt.Println("Deleting existing folder")
os.RemoveAll(tmpDir)
}
fmt.Println("Creating folder")
os.Mkdir(tmpDir, os.ModePerm)
}
func generate(doc *fitz.Document, number int) {
fmt.Println("Generating image from page", number)
img, err := doc.Image(number)
if err != nil {
log.Fatal("Error while extracting image", err)
panic(err)
}
name := fmt.Sprintf("test%03d.jpg", number)
f, err := os.Create(filepath.Join(tmpDir, name))
defer f.Close()
if err != nil {
log.Fatal("Error while creating file")
panic(err)
}
err = jpeg.Encode(f, img, &jpeg.Options{1})
if err != nil {
log.Fatal("Error while saving image")
panic(err)
}
fmt.Println("Write finished", number)
}
func main() {
runtime.GOMAXPROCS(2)
doc, err := fitz.New("12MB.pdf")
fmt.Println(reflect.TypeOf(doc))
if err != nil {
panic(err)
}
defer doc.Close()
createOutputFolder()
var wg sync.WaitGroup
numPages := doc.NumPage()
wg.Add(numPages)
fmt.Println("===> Number of Pages", numPages)
for number := 0; number < numPages; number++ {
fmt.Println("=>", number)
go func(doc *fitz.Document, number int, wg *sync.WaitGroup) {
fmt.Println("Called")
defer wg.Done()
generate(doc, number)
}(doc, number, &wg)
}
wg.Wait()
fmt.Println("===> Done")
}
from go-fitz.
@rondymesquita my PDF is around 16MB and 15 pages, but I don't experience any crash. What is your machine specs ?
from go-fitz.
I guess it it not related with size itself but with PDF content. If it has to many images and/or is a PDF from scanned images, sometimes I get a crash. I'm using inside of docker container and I have limited memory and CPU. I did not mapped the error yet.
from go-fitz.
I stopped using goroutines for now as it was crashing randomly despite the fix submitted previously. It happens once every 10 convert approx.
Still trying to figure out how to make it goroutine-friendly as I really need the multi-threading speed
from go-fitz.
I ended up taking a process fork approach. Each Image() gets a short-lived process. At first sight it looks cumbersome and CPU-intensive, but after many approaches, we found it's the most stable, secure and fast, specially when you RPC. Running thousands of Image() per day under 1 second.
from go-fitz.
@raliste thanks for the update. I'm not sure I follow entirely, do you open the PDF with a go-fitz instance once in a separate process for each page inside the PDF ?
from go-fitz.
@PuKoren yes. There's a -fork flag that subprocess the same program with the args to the file, page and width.
from go-fitz.
I investigated a little how MuPDF works and what is needed to have real concurrency support, and there are some problems. What is needed is described here https://mupdf.com/docs/coding-overview.html#multi-threading, excerpt from there:
The following simple rules should be followed to ensure that multi-threaded operations run smoothly:
"No simultaneous calls to MuPDF in different threads are allowed to use the same context."
Most of the time it is simplest to just use a different context for every thread; just create a new context at the same time as you create the thread. For more details see "Cloning the context" below.
"No simultaneous calls to MuPDF in different threads are allowed to use the same document."
Only one thread can be accessing a document at a time, but once display lists are created from that document, multiple threads at a time can operate on them.
The document can be used from several different threads as long as there are safeguards in place to prevent the usages being simultaneous.
"No simultaneous calls to MuPDF in different threads are allowed to use the same device."
Calling a device simultaneously from different threads will cause it to get confused and may crash. Calling a device from several different threads is perfectly acceptable as long as there are safeguards in place to prevent the calls being simultaneous.
So context must be cloned, that is not a problem, but other rules are difficult to make right, especially not accessing the same document from other threads, but we can work with display lists in other threads (not sure what to do with them). I also tried to pass lock/unlock functions when creating context, and that works but goroutine is not the same as thread, probably you experienced crashes when some routine is started on a different thread.
There is an example here https://mupdf.com/docs/examples/multi-threaded.c, but for now, I don't see an easy way to apply that to Go. Any help or pointers are welcomed.
from go-fitz.
@raliste that sounds smart, I will give it a try. Thanks !
Update: thanks a lot, I used what you suggested with goroutines and I am down to 2.3s for 15 pages conversion at 200 DPI. Awesome.
@PuKoren
Hi.
Could you share the sulotion? Thanks.
from go-fitz.
@xiaoxfan maybe you can try to rename the variable inside the goroutine doc, err := fitz.NewFromMemory(src)
to something like doc2, err := fitz.NewFromMemory(src)
because doc is already declared outside of it, I'm unsure of the behavior 🤔
(its weird that in the code I pasted I also use the same naming)
sorry I realized I missed a function in the previous code:
func createChunks(n, max int) [][]int {
is := make([]int, n)
for i := 0; i < n; i++ {
is[i] = i
}
if n <= max {
max = n
}
size := n / max
var chunks [][]int
for i := 0; i < n; i += size {
j := i + size
if j > n {
j = n
}
chunks = append(chunks, is[i:j])
}
return chunks
}
func concurrency() int {
if v := os.Getenv("N_WORKERS"); v != "" {
if n, err := strconv.Atoi(v); err == nil {
return n
}
}
return 2
}
Did you see a pattern in the files that are non-working vs the ones that are ok? Like the number of pages, the document size, etc.
from go-fitz.
@PuKoren Thank you for your reply,
this problem still exists, even if I use built-int func 'copy' to copy input byte slice for each goroutine. For most cases,it works fine,but a few specificIt pdf cause crash. That makes me very confused.
for n := 0; n < doc1.NumPage(); n++ {
go func(n int) {
defer wg.Done()
src1 := make([]byte,len(src))
copy(src1,src)
doc, err := fitz.NewFromMemory(src1)
if err != nil {
log.Println(err)
return
}
defer doc.Close()
ret[n], err = doc.ImagePNG(n, dpi)
if err != nil {
log.Println(err)
return
}
}(n)
}
from go-fitz.
@xiaoxfan are those PDF working when processed without goroutines with a single gofitz instance?
from go-fitz.
@xiaoxfan are those PDF working when processed without goroutines with a single gofitz instance?
Yes
from go-fitz.
@xiaoxfan If you use the same code I pasted here you get the same issue?
from go-fitz.
@xiaoxfan If you use the same code I pasted here you get the same issue?
Yes, only a few specific pdf files (5 pages,no obvious difference from the others) will not work, the rest work good.
update: mupdf has a multi-thread example.but I don't know how to wrapper it with cgo.
https://mupdf.com/docs/examples/multi-threaded.c
from go-fitz.
@xiaoxfan I think it would be hard to debug without the PDF files to try locally
however I don't have enough time now to do it, so I can't help you much
from go-fitz.
@xiaoxfan I think it would be hard to debug without the PDF files to try locally
however I don't have enough time now to do it, so I can't help you much
@PuKoren
Thank you very much for your help, I will try to find other solutions.
from go-fitz.
Nice discussion in PyMUPDF pymupdf/PyMuPDF#97, but also without the solution.
from go-fitz.
Related Issues (20)
- not enough data to determine image format HOT 3
- Go build error: undefined: fitz.New HOT 10
- github.com/gen2brain/go-fitz cc1.exe: sorry, unimplemented: 64-bit mode not compiled in HOT 1
- Error: ./PDFToImg.go:21:19: undefined: fitz.New HOT 3
- error: cannot find builtin CJK font HOT 3
- can build in mips arch? HOT 3
- Docker golang:alpine "cannot find -lmupdf_linux_arm64_musl: No such file or directory" HOT 1
- go mod vendor local vendor dir not include libs dir and include dir HOT 2
- system crash when converting page to image HOT 9
- error: cannot find builtin CJK font HOT 2
- outline.page.page undefined (type _Ctype_int has no field or method page) compiler (MissingFieldOrMethod) HOT 6
- libmupdf_linux_amd64.a error HOT 8
- Implement Text Extraction in PyMuPdf Fitz Layout Mode HOT 4
- how to build mupdf static library in window platform HOT 10
- cannot find builtin CJK font HOT 1
- doc.HTML() can it support <table> <tr> <td>? HOT 1
- linux compilation error HOT 3
- linux ubuntu build fail HOT 5
- Makes discord/lilliput segfault HOT 2
- fitz is empty HOT 2
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from go-fitz.