1 Star 0 Fork 1

gzlwz/golang-pdfcpu

forked from Deeao/golang-pdfcpu 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
export.go 16.96 KB
一键复制 编辑 原始数据 按行查看 历史
liuweizhi 提交于 2024-12-10 15:13 . 复制黏贴pdfcpu v0.81版本
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778
/*
Copyright 2023 The pdfcpu Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package form
import (
"encoding/json"
"io"
"path/filepath"
"strings"
"time"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/primitives"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types"
"github.com/pkg/errors"
)
// Header represents form meta data.
type Header struct {
Source string `json:"source"`
Version string `json:"version"`
Creation string `json:"creation"`
ID []string `json:"id,omitempty"`
Title string `json:"title,omitempty"`
Author string `json:"author,omitempty"`
Creator string `json:"creator,omitempty"`
Producer string `json:"producer,omitempty"`
Subject string `json:"subject,omitempty"`
Keywords string `json:"keywords,omitempty"`
}
// TextField represents a form text field.
type TextField struct {
Pages []int `json:"pages"`
ID string `json:"id"`
Name string `json:"name,omitempty"`
Default string `json:"default,omitempty"`
Value string `json:"value"`
Multiline bool `json:"multiline"`
Locked bool `json:"locked"`
}
// DateField represents an Acroform date field.
type DateField struct {
Pages []int `json:"pages"`
ID string `json:"id"`
Name string `json:"name,omitempty"`
Format string `json:"format"`
Default string `json:"default,omitempty"`
Value string `json:"value"`
Locked bool `json:"locked"`
}
// RadioButtonGroup represents a form checkbox.
type CheckBox struct {
Pages []int `json:"pages"`
ID string `json:"id"`
Name string `json:"name,omitempty"`
Default bool `json:"default"`
Value bool `json:"value"`
Locked bool `json:"locked"`
}
// RadioButtonGroup represents a form radio button group.
type RadioButtonGroup struct {
Pages []int `json:"pages"`
ID string `json:"id"`
Name string `json:"name,omitempty"`
Options []string `json:"options"`
Default string `json:"default,omitempty"`
Value string `json:"value"`
Locked bool `json:"locked"`
}
// ComboBox represents a form combobox.
type ComboBox struct {
Pages []int `json:"pages"`
ID string `json:"id"`
Name string `json:"name,omitempty"`
Editable bool `json:"editable"`
Options []string `json:"options"`
Default string `json:"default,omitempty"`
Value string `json:"value"`
Locked bool `json:"locked"`
}
// ListBox represents a form listbox.
type ListBox struct {
Pages []int `json:"pages"`
ID string `json:"id"`
Name string `json:"name,omitempty"`
Multi bool `json:"multi"`
Options []string `json:"options"`
Defaults []string `json:"defaults,omitempty"`
Values []string `json:"values,omitempty"`
Locked bool `json:"locked"`
}
// Page is a container for page imageboxes.
type Page struct {
ImageBoxes []*primitives.ImageBox `json:"image,omitempty"`
}
// Form represents a PDF form (aka. Acroform).
type Form struct {
TextFields []*TextField `json:"textfield,omitempty"`
DateFields []*DateField `json:"datefield,omitempty"`
CheckBoxes []*CheckBox `json:"checkbox,omitempty"`
RadioButtonGroups []*RadioButtonGroup `json:"radiobuttongroup,omitempty"`
ComboBoxes []*ComboBox `json:"combobox,omitempty"`
ListBoxes []*ListBox `json:"listbox,omitempty"`
Pages map[string]*Page `json:"pages,omitempty"`
}
// FormGroup represents a JSON struct containing a sequence of form instances.
type FormGroup struct {
Header Header `json:"header"`
Forms []Form `json:"forms"`
}
func (f Form) textFieldValueAndLock(id, name string) (string, bool, bool) {
for _, tf := range f.TextFields {
if tf.ID == id || tf.Name == name {
return tf.Value, tf.Locked, true
}
}
return "", false, false
}
func (f Form) dateFieldValueAndLock(id, name string) (string, bool, bool) {
for _, df := range f.DateFields {
if df.ID == id || df.Name == name {
return df.Value, df.Locked, true
}
}
return "", false, false
}
func (f Form) checkBoxValueAndLock(id, name string) (bool, bool, bool) {
for _, cb := range f.CheckBoxes {
if cb.ID == id || cb.Name == name {
return cb.Value, cb.Locked, true
}
}
return false, false, false
}
func (f Form) radioButtonGroupValueAndLock(id, name string) (string, bool, bool) {
for _, rbg := range f.RadioButtonGroups {
if rbg.ID == id || rbg.Name == name {
return rbg.Value, rbg.Locked, true
}
}
return "", false, false
}
func (f Form) comboBoxValueAndLock(id, name string) (string, bool, bool) {
for _, cb := range f.ComboBoxes {
if cb.ID == id || cb.Name == name {
return cb.Value, cb.Locked, true
}
}
return "", false, false
}
func (f Form) listBoxValuesAndLock(id, name string) ([]string, bool, bool) {
for _, lb := range f.ListBoxes {
if lb.ID == id || lb.Name == name {
return lb.Values, lb.Locked, true
}
}
return nil, false, false
}
func extractRadioButtonGroupOptions(xRefTable *model.XRefTable, d types.Dict) ([]string, error) {
var opts []string
p := 0
for _, o := range d.ArrayEntry("Kids") {
d, err := xRefTable.DereferenceDict(o)
if err != nil {
return nil, err
}
indRef := d.IndirectRefEntry("P")
if indRef != nil {
if p == 0 {
p = indRef.ObjectNumber.Value()
} else if p != indRef.ObjectNumber.Value() {
continue
}
}
d1 := d.DictEntry("AP")
if d1 == nil {
return nil, errors.New("corrupt form field: missing entry AP")
}
d2 := d1.DictEntry("N")
if d2 == nil {
return nil, errors.New("corrupt AP field: missing entry N")
}
for k := range d2 {
k, err := types.DecodeName(k)
if err != nil {
return nil, err
}
if k != "Off" {
for _, opt := range opts {
if opt == k {
continue
}
}
opts = append(opts, k)
}
}
}
return opts, nil
}
func extractRadioButtonGroup(xRefTable *model.XRefTable, page int, d types.Dict, id, name string, locked bool) (*RadioButtonGroup, error) {
rbg := &RadioButtonGroup{Pages: []int{page}, ID: id, Name: name, Locked: locked}
if s := d.NameEntry("DV"); s != nil {
n, err := types.DecodeName(*s)
if err != nil {
return nil, err
}
rbg.Default = n
}
if s := d.NameEntry("V"); s != nil {
n, err := types.DecodeName(*s)
if err != nil {
return nil, err
}
if n != "Off" {
rbg.Value = n
}
}
opts, err := extractRadioButtonGroupOptions(xRefTable, d)
if err != nil {
return nil, err
}
rbg.Options = opts
return rbg, nil
}
func extractCheckBox(page int, d types.Dict, id, name string, locked bool) (*CheckBox, error) {
cb := &CheckBox{Pages: []int{page}, ID: id, Name: name, Locked: locked}
if o, ok := d.Find("DV"); ok {
cb.Default = o.(types.Name) != "Off"
}
if o, ok := d.Find("V"); ok {
cb.Value = o.(types.Name) != "Off"
}
return cb, nil
}
func extractComboBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name string, locked bool) (*ComboBox, error) {
cb := &ComboBox{Pages: []int{page}, ID: id, Name: name, Locked: locked}
if sl := d.StringLiteralEntry("DV"); sl != nil {
s, err := types.StringLiteralToString(*sl)
if err != nil {
return nil, err
}
cb.Default = strings.TrimSpace(s)
}
if sl := d.StringLiteralEntry("V"); sl != nil {
s, err := types.StringLiteralToString(*sl)
if err != nil {
return nil, err
}
cb.Value = strings.TrimSpace(s)
}
opts, err := parseOptions(xRefTable, d)
if err != nil {
return nil, err
}
if len(opts) == 0 {
return nil, errors.New("pdfcpu: combobox missing Opts")
}
cb.Options = opts
return cb, nil
}
func extractDateFormat(d types.Dict) (*primitives.DateFormat, error) {
d1 := d.DictEntry("AA")
if len(d1) > 0 {
d2 := d1.DictEntry("F")
if len(d2) > 0 {
sl := d2.StringLiteralEntry("JS")
if sl != nil {
s, err := types.StringLiteralToString(*sl)
if err != nil {
return nil, err
}
i := strings.Index(s, "AFDate_FormatEx(\"")
if i >= 0 {
from := i + len("AFDate_FormatEx(\"")
s = s[from : from+10]
}
if df, err := primitives.DateFormatForFmtExt(s); err == nil {
return df, nil
}
}
}
}
if o, found := d.Find("DV"); found {
sl, err := types.StringOrHexLiteral(o)
if err != nil {
return nil, err
}
s := ""
if sl != nil {
s = *sl
}
if df, err := primitives.DateFormatForDate(s); err == nil {
return df, nil
}
}
if o, found := d.Find("V"); found {
sl, err := types.StringOrHexLiteral(o)
if err != nil {
return nil, err
}
s := ""
if sl != nil {
s = *sl
}
if df, err := primitives.DateFormatForDate(s); err == nil {
return df, nil
}
}
return nil, nil
}
func extractDateField(page int, d types.Dict, id, name string, df *primitives.DateFormat, locked bool) (*DateField, error) {
dfield := &DateField{Pages: []int{page}, ID: id, Name: name, Format: df.Ext, Locked: locked}
if o, found := d.Find("DV"); found {
sl, err := types.StringOrHexLiteral(o)
if err != nil {
return nil, err
}
dfield.Default = ""
if sl != nil {
dfield.Default = *sl
}
}
if o, found := d.Find("V"); found {
sl, err := types.StringOrHexLiteral(o)
if err != nil {
return nil, err
}
dfield.Value = ""
if sl != nil {
dfield.Value = *sl
}
}
return dfield, nil
}
func extractTextField(page int, d types.Dict, id, name string, ff *int, locked bool) (*TextField, error) {
multiLine := ff != nil && uint(primitives.FieldFlags(*ff))&uint(primitives.FieldMultiline) > 0
tf := &TextField{Pages: []int{page}, ID: id, Name: name, Multiline: multiLine, Locked: locked}
if o, found := d.Find("DV"); found {
s, err := types.StringOrHexLiteral(o)
if err != nil {
return nil, err
}
tf.Default = ""
if s != nil {
tf.Default = *s
}
}
if o, found := d.Find("V"); found {
s, err := types.StringOrHexLiteral(o)
if err != nil {
return nil, err
}
tf.Value = ""
if s != nil {
tf.Value = *s
}
}
return tf, nil
}
func extractListBox(xRefTable *model.XRefTable, page int, d types.Dict, id, name string, locked, multi bool) (*ListBox, error) {
lb := &ListBox{Pages: []int{page}, ID: id, Name: name, Locked: locked, Multi: multi}
if !multi {
if sl := d.StringLiteralEntry("DV"); sl != nil {
s, err := types.StringLiteralToString(*sl)
if err != nil {
return nil, err
}
lb.Defaults = []string{strings.TrimSpace(s)}
}
if sl := d.StringLiteralEntry("V"); sl != nil {
s, err := types.StringLiteralToString(*sl)
if err != nil {
return nil, err
}
lb.Values = []string{strings.TrimSpace(s)}
}
} else {
ss, err := parseStringLiteralArray(xRefTable, d, "DV")
if err != nil {
return nil, err
}
lb.Defaults = ss
ss, err = parseStringLiteralArray(xRefTable, d, "V")
if err != nil {
return nil, err
}
lb.Values = ss
}
opts, err := parseOptions(xRefTable, d)
if err != nil {
return nil, err
}
if len(opts) == 0 {
return nil, errors.New("pdfcpu: listbox missing Opts")
}
lb.Options = opts
return lb, nil
}
func header(xRefTable *model.XRefTable, source string) Header {
h := Header{}
h.Source = filepath.Base(source)
h.Version = "pdfcpu " + model.VersionStr
h.Creation = time.Now().Format("2006-01-02 15:04:05 MST")
h.ID = []string{}
h.Title = xRefTable.Title
h.Author = xRefTable.Author
h.Creator = xRefTable.Creator
h.Producer = xRefTable.Producer
h.Subject = xRefTable.Subject
h.Keywords = xRefTable.Keywords
return h
}
func fieldsForAnnots(xRefTable *model.XRefTable, annots, fields types.Array) (map[string]fieldInfo, error) {
m := map[string]fieldInfo{}
var prevId string
for _, v := range annots {
indRef := v.(types.IndirectRef)
ok, fi, err := isField(xRefTable, indRef, fields)
if err != nil {
return nil, err
}
if !ok {
continue
}
if fi.indRef == nil {
fi.indRef = &indRef
}
if fi.id != prevId {
m[fi.id] = *fi
prevId = fi.id
}
}
return m, nil
}
func exportBtn(
xRefTable *model.XRefTable,
i int,
form *Form,
d types.Dict,
id, name string,
locked bool,
ok *bool) error {
if len(d.ArrayEntry("Kids")) > 1 {
for _, rb := range form.RadioButtonGroups {
if rb.ID == id && rb.Name == name {
rb.Pages = append(rb.Pages, i)
return nil
}
}
rbg, err := extractRadioButtonGroup(xRefTable, i, d, id, name, locked)
if err != nil {
return err
}
form.RadioButtonGroups = append(form.RadioButtonGroups, rbg)
*ok = true
return nil
}
for _, cb := range form.CheckBoxes {
if cb.Name == name && cb.ID == id {
cb.Pages = append(cb.Pages, i)
return nil
}
}
cb, err := extractCheckBox(i, d, id, name, locked)
if err != nil {
return err
}
form.CheckBoxes = append(form.CheckBoxes, cb)
*ok = true
return nil
}
func exportCh(
xRefTable *model.XRefTable,
i int,
form *Form,
d types.Dict,
id, name string,
locked bool,
ok *bool) error {
ff := d.IntEntry("Ff")
if ff == nil {
return errors.New("pdfcpu: corrupt form field: missing entry Ff")
}
if primitives.FieldFlags(*ff)&primitives.FieldCombo > 0 {
for _, cb := range form.ComboBoxes {
if cb.Name == name && cb.ID == id {
cb.Pages = append(cb.Pages, i)
return nil
}
}
cb, err := extractComboBox(xRefTable, i, d, id, name, locked)
if err != nil {
return err
}
form.ComboBoxes = append(form.ComboBoxes, cb)
*ok = true
return nil
}
for _, lb := range form.ListBoxes {
if lb.Name == name && lb.ID == id {
lb.Pages = append(lb.Pages, i)
return nil
}
}
multi := primitives.FieldFlags(*ff)&primitives.FieldMultiselect > 0
lb, err := extractListBox(xRefTable, i, d, id, name, locked, multi)
if err != nil {
return err
}
form.ListBoxes = append(form.ListBoxes, lb)
*ok = true
return nil
}
func exportTx(
i int,
form *Form,
d types.Dict,
id, name string,
ff *int,
locked bool,
ok *bool) error {
df, err := extractDateFormat(d)
if err != nil {
return err
}
if df != nil {
for _, df := range form.DateFields {
if df.Name == name && df.ID == id {
df.Pages = append(df.Pages, i)
return nil
}
}
df, err := extractDateField(i, d, id, name, df, locked)
if err != nil {
return err
}
form.DateFields = append(form.DateFields, df)
*ok = true
return nil
}
for _, tf := range form.TextFields {
if tf.Name == name && tf.ID == id {
tf.Pages = append(tf.Pages, i)
return nil
}
}
tf, err := extractTextField(i, d, id, name, ff, locked)
if err != nil {
return err
}
form.TextFields = append(form.TextFields, tf)
*ok = true
return nil
}
func exportPageFields(xRefTable *model.XRefTable, i int, form *Form, m map[string]fieldInfo, ok *bool) error {
for id, fi := range m {
name := fi.name
d, err := xRefTable.DereferenceDict(*fi.indRef)
if err != nil {
return err
}
if len(d) == 0 {
continue
}
var locked bool
ff := d.IntEntry("Ff")
if ff != nil {
locked = uint(primitives.FieldFlags(*ff))&uint(primitives.FieldReadOnly) > 0
}
ft := fi.ft
if ft == nil {
ft = d.NameEntry("FT")
if ft == nil {
return errors.New("pdfcpu: corrupt form field: missing entry FT")
}
}
switch *ft {
case "Btn":
if err := exportBtn(xRefTable, i, form, d, id, name, locked, ok); err != nil {
return err
}
case "Ch":
if err := exportCh(xRefTable, i, form, d, id, name, locked, ok); err != nil {
return err
}
case "Tx":
if err := exportTx(i, form, d, id, name, ff, locked, ok); err != nil {
return err
}
}
}
return nil
}
// ExportForm extracts form data originating from source from xRefTable.
func ExportForm(xRefTable *model.XRefTable, source string) (*FormGroup, bool, error) {
fields, err := fields(xRefTable)
if err != nil {
return nil, false, err
}
formGroup := FormGroup{}
formGroup.Header = header(xRefTable, source)
form := Form{}
var ok bool
for i := 1; i <= xRefTable.PageCount; i++ {
d, _, _, err := xRefTable.PageDict(i, false)
if err != nil {
return nil, false, err
}
o, found := d.Find("Annots")
if !found {
continue
}
arr, err := xRefTable.DereferenceArray(o)
if err != nil {
return nil, false, err
}
m, err := fieldsForAnnots(xRefTable, arr, fields)
if err != nil {
return nil, false, err
}
if err := exportPageFields(xRefTable, i, &form, m, &ok); err != nil {
return nil, false, err
}
}
formGroup.Forms = []Form{form}
return &formGroup, ok, nil
}
// ExportFormJSON extracts form data originating from source from xRefTable and writes a JSON representation to w.
func ExportFormJSON(xRefTable *model.XRefTable, source string, w io.Writer) (bool, error) {
formGroup, ok, err := ExportForm(xRefTable, source)
if err != nil || !ok {
return false, err
}
bb, err := json.MarshalIndent(formGroup, "", "\t")
if err != nil {
return false, err
}
_, err = w.Write(bb)
return ok, err
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/gzlwz/golang-pdfcpu.git
git@gitee.com:gzlwz/golang-pdfcpu.git
gzlwz
golang-pdfcpu
golang-pdfcpu
v0.0.1

搜索帮助

0d507c66 1850385 C8b1a773 1850385