简单粗暴的翻译英文pdf

背景:看书的时候经常遇到英文pdf,没有合适的翻译软件可以快速翻译全书。这里提供一个解决方案。

Step 1

  • 打开英文pdf
  • CTRL+A全选文字
  • CTRL+C复制
  • 打开记事本
  • CTRL+V复制
  • 保存为data.txt

Step 2

写一个C++脚本

// ToolPdf2Html.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//

#include <iostream>
#include <cstring>
#include <vector>
#include <unordered_map>
#include <fstream>
#include <iostream>
#include <sstream>
#include <windows.h>

//pdf->txt->html (edge translate)
static void ReadFileContentsByte(std::string filename, std::vector<char>& data)
{
    std::fstream fin;
    fin.open(filename, std::ios::in | std::ios::binary);


    if (!fin.is_open())
    {
        return;
    }
    //const int LENGTH = 1000;

    fin.seekg(0, std::ios::end);
    long int size = fin.tellg();
    fin.seekg(0, std::ios::beg);

    data.resize(size, 0);

    char temp;
    long i = 0;
    while ((temp = fin.get()) != EOF)
    {
        //str.push_back((char)temp);
        if (i >= size) {
            std::cout << i << "/" << size << std::endl;
            exit(0);
        }
        data[i] = temp;
        i++;
        //std::cout << (byte)temp;
    }
    fin.close();
}

static void ReadFileContentsLines(std::string filename, std::vector<std::string>& data)
{
    std::fstream fin;
    fin.open(filename, std::ios::in | std::ios::binary);


    if (!fin.is_open())
    {
        return;
    }
    
    std::string line;

    while (std::getline(fin, line))
    {
        data.push_back(line);
    }
    fin.close();
}

static void WriteFileContentsByte(std::string filename, std::string& data)
{
    std::fstream fout;
    fout.open(filename, std::ios::out);
    if (!fout.is_open())
    {
        std::cout << "no open file " << filename << std::endl;
        return;
    }

    fout << data;
    fout.close();
}

static std::string Number2Stri(int value)
{
    std::string str;
    std::stringstream ss;
    ss << value;
    ss >> str;
    return str;
}


int main(int argc, char *argv[])
{
    std::cout << "Hello World!\n";
    std::string filepath = "data.txt";
    if (argc > 1) {
        filepath = argv[1];
    }
    std::cout << "filepath=" << filepath.c_str() << std::endl;
    std::vector<std::string> data;
    ReadFileContentsLines(filepath, data);

    std::cout << "data.size=" << data.size() << std::endl;

    //每100行分割成一个html
    std::string htmlHead =
        "<!DOCTYPE html>\n"
        "<html>\n"
        "<head>\n"
        "<meta charset = \"utf-8\">\n"
        "<title>The C++ Programming Language</title>\n"
        "</head>\n"
        "<body>\n";

    std::string htmlEnd =
        "</body> </html>";

    std::string htmlPrevious =
        "<br/><br/><a href=\"a.hmtl\">上一个</a>";

    std::string htmlNext =
        " <a href=\"a.hmtl\">下一个</a>";

    int len = data.size();

    std::string output = htmlHead;


    WIN32_FIND_DATA findData;
    HANDLE hFind = FindFirstFile(L"output\\*", &findData);
    if (hFind == INVALID_HANDLE_VALUE) {
        std::cout << "文件夹不存在" << std::endl;
        if (CreateDirectory(L"output", NULL)) {
            std::cout << "文件夹创建成功" << std::endl;
        }
        else {
            std::cout << "文件夹创建失败,错误代码:" << GetLastError() << std::endl;
        }
    }
    else {
        FindClose(hFind);
        std::cout << "文件夹存在" << std::endl;
    }

    const int singleLen = 50;
    for (int i = 0; i < len; ++i) {
        //std::cout << i << ":" << data[i] << std::endl;

        if (i != 0 && i % singleLen == 0) {           

            {                
                //save
                int index = i / singleLen;

                //test
                //if (index > 10) {
                //    break;
                //}

                if (index == 1) {
                }
                else {
                    output += "<br/><br/><a href=\"index" + Number2Stri((i - 1) / singleLen) + ".html\">Previous</a> ";

                }

                output += Number2Stri(i / singleLen);

                if ((index + 1) * singleLen >= len) {

                }
                else {
                    output += " <a href=\"index" + Number2Stri(index + 1) + ".html\">Next</a>";
                }

                output += htmlEnd;

                std::string filep = "output/index" + Number2Stri(index) + ".html";
                std::cout << "write to " << filep.c_str() << std::endl;
               // std::cout << "output to " << output.c_str() << std::endl;
                WriteFileContentsByte(filep, output);

                output = htmlHead;
            }

        }


        output += data[i] + "<br/>";
    }
    std::cout << "finish." << std::endl;
}


  • 脚本会读入data.txt
  • 按行处理,每50行生成一个html
  • 命令行运行脚本
ToolPdf2Html.exe data.txt
  • 在output文件夹下生成一堆html

Step 3

用微软的Edge浏览器打开html,浏览器自动翻译英文

在这里插入图片描述
通过上一页下一页翻页

相关推荐

  1. 【教程】英文字幕批量翻译

    2024-05-14 13:10:07       34 阅读
  2. 一些用 GPT 翻译计算机科学/人工智能 PDF 讲义

    2024-05-14 13:10:07       52 阅读
  3. 【chatgpt】学术翻译英文润色prompt

    2024-05-14 13:10:07       34 阅读

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-05-14 13:10:07       94 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-05-14 13:10:07       100 阅读
  3. 在Django里面运行非项目文件

    2024-05-14 13:10:07       82 阅读
  4. Python语言-面向对象

    2024-05-14 13:10:07       91 阅读

热门阅读

  1. Vue setup函数

    2024-05-14 13:10:07       32 阅读
  2. Kotlin标准函数和静态方法

    2024-05-14 13:10:07       31 阅读
  3. iOS 键盘相关

    2024-05-14 13:10:07       28 阅读
  4. 实用的Chrome命令

    2024-05-14 13:10:07       26 阅读
  5. iOS 更改button文字和图片的位置

    2024-05-14 13:10:07       27 阅读
  6. Linux下安装netcore

    2024-05-14 13:10:07       31 阅读
  7. Linux sigfillset

    2024-05-14 13:10:07       27 阅读
  8. vue3+vite项目部署服务器,选择非根目录访问

    2024-05-14 13:10:07       28 阅读
  9. ubuntu升级git

    2024-05-14 13:10:07       35 阅读
  10. git error: cannot lock ref ‘xxx‘:解决

    2024-05-14 13:10:07       33 阅读
  11. GitLab CI/CD的原理及应用详解(二)

    2024-05-14 13:10:07       32 阅读
  12. golang中switch-case及select-cas

    2024-05-14 13:10:07       34 阅读
  13. Mysql中校对集utf8_unicode_ci与utf8_general_ci的区别

    2024-05-14 13:10:07       30 阅读