使用java为pdf添加书签的方法(pdf书签制作)

由于我经常下载一些pdf格式的电子书，有的时候一些好书下载下来没有书签，读起来感觉没有整体的感觉，所以决定自己写一个小工具，将特定格式的文本解析成为书签，然后保存到pdf格式中。
整体思路是从豆瓣啊、京东啊、当当啊、亚马逊下面的介绍中可以copy出目录信息，拿《http权威指南》为例：
目录的结构如：
复制代码代码如下:
第1章 http 概述 3
1.1 http——因特网的多媒体信使 4
1.2 web 客户端和服务器 4
1.3 资源 5
1.3.1 媒体类型 6
1.3.2 uri 7
1.3.3 url 7
1.3.4 urn 8
1.4 事务 9
1.4.1 方法 9
1.4.2 状态码 10
1.4.3 web 页面中可以包含多个对象 10
1.5 报文 11
1.6 连接 13

每一行后面都有页码，而且是用空格分开的。
处理之后，结果为：

主要的逻辑为：

复制代码代码如下:
package org.fra.pdf.bussiness;

import java.io.bufferedreader;
import java.io.fileoutputstream;
import java.io.ioexception;
import java.util.arraylist;
import java.util.hashmap;
import java.util.list;
import java.util.stack;

import com.itextpdf.text.documentexception;
import com.itextpdf.text.pdf.inthashtable;
import com.itextpdf.text.pdf.pdfarray;
import com.itextpdf.text.pdf.pdfdictionary;
import com.itextpdf.text.pdf.pdfindirectreference;
import com.itextpdf.text.pdf.pdfname;
import com.itextpdf.text.pdf.pdfnumber;
import com.itextpdf.text.pdf.pdfobject;
import com.itextpdf.text.pdf.pdfreader;
import com.itextpdf.text.pdf.pdfstamper;
import com.itextpdf.text.pdf.pdfstring;
import com.itextpdf.text.pdf.simplebookmark;

public class addpdfoutlinefromtxt {
private stack<outlineinfo> parentoutlinestack = new stack<outlineinfo>();

    public void createpdf(string destpdf, string sourcepdf,
            bufferedreader bufread, int pattern) throws ioexception,
            documentexception {

        if (pattern != addbookmarkconstants.reserved_old_outline
                && pattern != addbookmarkconstants.reserved_none
                && pattern != addbookmarkconstants.reserved_first_outline)
            return;
        // 读入pdf文件
        pdfreader reader = new pdfreader(sourcepdf);

        list<hashmap<string, object>> outlines = new arraylist<hashmap<string, object>>();
        if (pattern == addbookmarkconstants.reserved_old_outline) {
            outlines.addall(simplebookmark.getbookmark(reader));
        } else if (pattern == addbookmarkconstants.reserved_first_outline) {
            addfirstoutlinereservedpdf(outlines, reader);
        }

        addbookmarks(bufread, outlines, null, 0);
        // 新建stamper
        pdfstamper stamper = new pdfstamper(reader, new fileoutputstream(
                destpdf));

        stamper.setoutlines(outlines);
        stamper.close();
    }

    private void addbookmarks(bufferedreader bufread,
            list<hashmap<string, object>> outlines,
            hashmap<string, object> preoutline, int prelevel)
            throws ioexception {
        string contentformatline = null;
        bufread.mark(1);
        if ((contentformatline = bufread.readline()) != null) {
            formattedbookmark bookmark = parseformmattedtext(contentformatline);

hashmap<string, object> map = parsebookmarktohashmap(bookmark);

            int level = bookmark.getlevel();
            // 如果n==m, 那么是同一层的，这个时候，就加到arraylist中,继续往下面读取
            if (level == prelevel) {
                outlines.add(map);
                addbookmarks(bufread, outlines, map, level);
            }
            // 如果n>m,那么可以肯定，该行是上一行的孩子，, new 一个kids的arraylist,并且加入到这个arraylist中
            else if (level > prelevel) {
                list<hashmap<string, object>> kids = new arraylist<hashmap<string, object>>();
                kids.add(map);
                preoutline.put("kids", kids);
                // 记录有孩子的outline信息
                parentoutlinestack.push(new outlineinfo(preoutline, outlines,
                        prelevel));
                addbookmarks(bufread, kids, map, level);
            }
            // 如果n<m , 那么就是说孩子增加完了，退回到上层，bufread倒退一行
            else if (level < prelevel) {
                bufread.reset();
                outlineinfo obj = parentoutlinestack.pop();
                addbookmarks(bufread, obj.getoutlines(), obj.getpreoutline(),
                        obj.getprelevel());
            }

}
}

    private hashmap<string, object> parsebookmarktohashmap(
            formattedbookmark bookmark) {
        hashmap<string, object> map = new hashmap<string, object>();
        map.put("title", bookmark.gettitle());
        map.put("action", "goto");
        map.put("page", bookmark.getpage() + " fit");
        return map;
    }

    private formattedbookmark parseformmattedtext(string contentformatline) {
        formattedbookmark bookmark = new formattedbookmark();
        string title = "";
        string destpage = "";

        // 当没有页码在字符串结尾的时候，一般就是书的名字，如果格式正确的话。
        int lastspaceindex = contentformatline.lastindexof(" ");
        if (lastspaceindex == -1) {
            title = contentformatline;
            destpage = "1";
        } else {
            title = contentformatline.substring(0, lastspaceindex);
            destpage = contentformatline.substring(lastspaceindex + 1);
        }

string[] titlesplit = title.split(" ");
int dotcount = titlesplit[0].split("\\.").length - 1;

        bookmark.setlevel(dotcount);
        bookmark.setpage(destpage);
        bookmark.settitle(title);
        return bookmark;
    }

    private void addfirstoutlinereservedpdf(
            list<hashmap<string, object>> outlines, pdfreader reader) {
        pdfdictionary catalog = reader.getcatalog();
        pdfobject obj = pdfreader.getpdfobjectrelease(catalog
                .get(pdfname.outlines));
        // 没有书签
        if (obj == null || !obj.isdictionary())
            return;
        pdfdictionary outlinesdictionary = (pdfdictionary) obj;
        // 得到第一个书签
        pdfdictionary firstoutline = (pdfdictionary) pdfreader
                .getpdfobjectrelease(outlinesdictionary.get(pdfname.first));

pdfstring titleobj = firstoutline.getasstring((pdfname.title));
string title = titleobj.tounicodestring();

pdfarray dest = firstoutline.getasarray(pdfname.dest);

        if (dest == null) {
            pdfdictionary action = (pdfdictionary) pdfreader
                    .getpdfobjectrelease(firstoutline.get(pdfname.a));
            if (action != null) {
                if (pdfname.goto.equals(pdfreader.getpdfobjectrelease(action
                        .get(pdfname.s)))) {
                    dest = (pdfarray) pdfreader.getpdfobjectrelease(action
                            .get(pdfname.d));
                }
            }
        }
        string deststr = parsedeststring(dest, reader);

        string[] decodestr = deststr.split(" ");
        int num = integer.valueof(decodestr[0]);
        hashmap<string, object> map = new hashmap<string, object>();
        map.put("title", title);
        map.put("action", "goto");
        map.put("page", num + " fit");

outlines.add(map);
}

    private string parsedeststring(pdfarray dest, pdfreader reader) {
        string deststr = "";
        if (dest.isstring()) {
            deststr = dest.tostring();
        } else if (dest.isname()) {
            deststr = pdfname.decodename(dest.tostring());
        } else if (dest.isarray()) {
            inthashtable pages = new inthashtable();
            int numpages = reader.getnumberofpages();
            for (int k = 1; k <= numpages; ++k) {
                pages.put(reader.getpageorigref(k).getnumber(), k);
                reader.releasepage(k);
            }

            deststr = makebookmarkparam((pdfarray) dest, pages);
        }
        return deststr;
    }

    private string makebookmarkparam(pdfarray dest, inthashtable pages) {
        stringbuffer s = new stringbuffer();
        pdfobject obj = dest.getpdfobject(0);
        if (obj.isnumber()) {
            s.append(((pdfnumber) obj).intvalue() + 1);
        } else {
            s.append(pages.get(getnumber((pdfindirectreference) obj)));
        }
        s.append(' ').append(dest.getpdfobject(1).tostring().substring(1));
        for (int k = 2; k < dest.size(); ++k) {
            s.append(' ').append(dest.getpdfobject(k).tostring());
        }
        return s.tostring();
    }

    private int getnumber(pdfindirectreference indirect) {
        pdfdictionary pdfobj = (pdfdictionary) pdfreader
                .getpdfobjectrelease(indirect);
        if (pdfobj.contains(pdfname.type)
                && pdfobj.get(pdfname.type).equals(pdfname.pages)
                && pdfobj.contains(pdfname.kids)) {
            pdfarray kids = (pdfarray) pdfobj.get(pdfname.kids);
            indirect = (pdfindirectreference) kids.getpdfobject(0);
        }
        return indirect.getnumber();
    }
}