simpledict详细分析| 青训营笔记

137 阅读9分钟

golang 在线字典实现详细分析

这是我参与「第三届青训营 -后端场」笔记创作活动的的第 2 篇笔记

前言

这是字节青训营使用 golang 实现在线字典的详细分析

尽可能将视频里面的步骤复刻出来

项目需求

通过 golang 调用在线翻译网址的 api 接口,在本地实现翻译


V1 版本

我们使用的在线翻译网站是:彩云小译 - 在线翻译 (caiyunapp.com)

我们要在这个网站里面寻找接口,这时我们需要 F12

  1. F12
  2. 随便翻译一个词
  3. 找到“dict”请求,注意:一定要是 POST 请求!

image.png

从请求报文中,我们就找到了翻译的接口了:

  • https://api.interpreter.caiyunai.com/v1/dict

同时,我们还需要获取它请求的 json

  • {trans_type: "en2zh", source: "word"}

此时可以开始我们的编码:

package main

import (
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"strings"
)

func main() {
	client := &http.Client{} // 声明一个Client
	var data = strings.NewReader(`{"trans_type":"en2zh","source":"good"}`)
	// 向指定的地址发送一个POST请求
	req, err := http.NewRequest("POST", "https://api.interpreter.caiyunai.com/v1/dict", data)
	if err != nil {
		log.Fatal(err)
	}
}

声明了 req 请求后还不行,我们还需要设置请求报文的信息

学过计网的都知道,报文头里面的东西很多,手写的话太麻烦了,而且很浪费时间

这里字节讲师给我们提供了一个根据 curl 请求自动生成代码的网站

我们再次回到之前的 dict POST 请求:右键,找到复制

image.png

  • 注意:是复制为 cURL(bash) 不是 cmd!!!

然后将其复制到那个网站上:我们就能得到代码了:

image.png

其实这就是 V1 的代码实现。。。。

package main

import (
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"strings"
)

func main() {
	client := &http.Client{} // 声明一个Client
	var data = strings.NewReader(`{"trans_type":"en2zh","source":"good"}`)
	// 向指定的地址发送一个POST请求
	req, err := http.NewRequest("POST", "https://api.interpreter.caiyunai.com/v1/dict", data)
	if err != nil {
		log.Fatal(err)
	}

	// 设置请求头
	req.Header.Set("Connection", "keep-alive")
	req.Header.Set("DNT", "1")
	req.Header.Set("os-version", "")
	req.Header.Set("sec-ch-ua-mobile", "?0")
	req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36")
	req.Header.Set("app-name", "xy")
	req.Header.Set("Content-Type", "application/json;charset=UTF-8")
	req.Header.Set("Accept", "application/json, text/plain, */*")
	req.Header.Set("device-id", "")
	req.Header.Set("os-type", "web")
	req.Header.Set("X-Authorization", "token:qgemv4jr1y38jyq6vhvi")
	req.Header.Set("Origin", "https://fanyi.caiyunapp.com")
	req.Header.Set("Sec-Fetch-Site", "cross-site")
	req.Header.Set("Sec-Fetch-Mode", "cors")
	req.Header.Set("Sec-Fetch-Dest", "empty")
	req.Header.Set("Referer", "https://fanyi.caiyunapp.com/")
	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
	req.Header.Set("Cookie", "_ym_uid=16456948721020430059; _ym_d=1645694872")

	// 发送请求
	resp, err := client.Do(req)
	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close() // 一般开启流后,需要紧跟着defer作时候关闭

	bodyText, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}

	// 输出结果
	fmt.Printf("%s\n", bodyText)
}


V2

一般的 web 应用都是用 json 进行交互的,所以我们在 golang 程序中就应该将参数用 json 的形式封装后再进行传递

// 构造request结构体
type DictRequest struct {
	TransType string `json:"trans_type"`
	Source    string `json:"source"`
	UserID    string `json:"user_id"`
}

然后我们再将其发送:

func main() {
	client := &http.Client{}
	request := DictRequest{TransType: "en2zh", Source: "good"}
	buf, err := json.Marshal(request)
	if err != nil {
		log.Fatal(err)
	}

	var data = bytes.NewReader(buf) // 将json化的buf的数据读取到data中
	req, err := http.NewRequest("POST", "https://api.interpreter.caiyunai.com/v1/dict", data)
	if err != nil {
		log.Fatal(err)
	}

.......

后面的代码和 v1 没有不同


V3

既然请求,那肯定有响应。

V3 对比 V2 的不同在于多了对响应回来的数据处理

自动生成结构体

我们再次回到刚刚的在线翻译网站,看一下 F12 下响应的格式:

image.png

{"rc":0,"wiki":{"known_in_laguages":124,"description":{"source":"smallest linguistic element that may be uttered in isolation with semantic or pragmatic content","target":null},"id":"Q8171","item":{"source":"word","target":"\u5355\u8bcd"},"image_url":"http:\/\/www.caiyunapp.com\/imgs\/link_default_img.png","is_subject":"true","sitelink":"https:\/\/www.caiyunapp.com\/read_mode\/?id=61b88462df8400468e55c2d2"},"dictionary":{"prons":{"en-us":"[w\u025dd]","en":"[w\u0259\u02d0d]"},"explanations":["n.\u8bcd;\u8bdd\u8bed;\u6d88\u606f;\u547d\u4ee4;(pl.)\u53e3\u89d2,\u4e89\u8bba;\u53e3\u4ee4","vt.\u7528\u8bdd\u8868\u793a;\u63aa\u8f9e"],"synonym":["phrase","say","express","voice","tell"],"antonym":[],"wqx_example":[["take my word","\u76f8\u4fe1\u6211\u7684\u8bdd,\u6211\u6562\u62c5\u4fdd"],["say the word","\u8bf4\u8bdd,\u8bf4\u4e00\u58f0,\u53d1\u5e03\u547d\u4ee4"],["put something into words","\u7528\u8bed\u8a00\u8868\u8fbe\u67d0\u4e8b"],["last word","\u9057\u5631;\u51b3\u5b9a\u6027\u7684\u610f\u89c1,\u5b9a\u8bba,\u6700\u540e\u51b3\u5b9a\u6743"],["in somebody's words","\u7528\u67d0\u4eba\u7684\u8bdd\u6765\u8bf4"],["in other words","\u6362\u53e5\u8bdd\u8bf4,\u4e5f\u5c31\u662f\u8bf4"],["in a word","\u603b\u800c\u8a00\u4e4b"],["have words with","\u548c\u67d0\u4eba\u5435\u67b6"],["go back on one's word","\u98df\u8a00,\u8fdd\u80cc\u8bfa\u8a00"],["give somebody one's word","\u5411\u67d0\u4eba\u4fdd\u8bc1"],["get the word","\u7406\u89e3\u610f\u601d"],["get in a word","\u63d2\u8bdd"],["at a word","\u8fc5\u901f\u5e94\u7b54,\u7acb\u5373"],["as good as one's word","\u4fe1\u5b88\u8bfa\u8a00,\u503c\u5f97\u4fe1\u8d56"],["a long word","\u5f88\u957f\u7684\u65f6\u95f4"],["Don't get in a word while the teacher is talking . ","\u8001\u5e08\u8bf4\u8bdd\u65f6\u4e0d\u8981\u63d2\u5634\u3002"]],"entry":"word","type":"word","related":[],"source":"wenquxing"}}
  • 长的离谱。。。

如果我们手动去构造结构体也是一件非常难受的事情,所以这里又再次提供了自动生成结构体的网站:

将我们得到的 json 复制过去,选择“转换-嵌套”即可:

image.png

  • 又快又准确!

我们把名字改一下:DictResponse

type DictResponse struct {
	Rc   int `json:"rc"`
	Wiki struct {
		KnownInLaguages int `json:"known_in_laguages"`
		Description     struct {
			Source string      `json:"source"`
			Target interface{} `json:"target"`
		} `json:"description"`
		ID   string `json:"id"`
		Item struct {
			Source string `json:"source"`
			Target string `json:"target"`
		} `json:"item"`
		ImageURL  string `json:"image_url"`
		IsSubject string `json:"is_subject"`
		Sitelink  string `json:"sitelink"`
	} `json:"wiki"`
	Dictionary struct {
		Prons struct {
			EnUs string `json:"en-us"`
			En   string `json:"en"`
		} `json:"prons"`
		Explanations []string      `json:"explanations"`
		Synonym      []string      `json:"synonym"`
		Antonym      []string      `json:"antonym"`
		WqxExample   [][]string    `json:"wqx_example"`
		Entry        string        `json:"entry"`
		Type         string        `json:"type"`
		Related      []interface{} `json:"related"`
		Source       string        `json:"source"`
	} `json:"dictionary"`
}

获取返回的响应

设置请求头的操作与 V2 一样

...........................

// 发送请求,并获取resp响应
	resp, err := client.Do(req)
	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close()

	// 用ioutil读取响应的信息,读取结果为json格式
	bodyText, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}

	// 解析结构体数据
	var dictResponse DictResponse
	err = json.Unmarshal(bodyText, &dictResponse)
	if err != nil {
		log.Fatal(err)
	}
	// 输出
	fmt.Printf("%#v\n", dictResponse)
}

V4

V4 是最终的版本。

将我们上面的操作封装成一个 query 函数类

同时作出一点小改进:

  • 在获取响应报文后,我们需要检查报文里面的响应码,查看是不是 200 成功码
  • 对于报文的处理,都要首先检查一下响应码是否正常。正常后,才能执行后序的操作
.......
	// 检查返回的状态码信息
	if resp.StatusCode != 200 {
		log.Fatal("bad StatusCode:", resp.StatusCode, "body", string(bodyText))
	}

	// 解析数据
	var dictResponse DictResponse
	err = json.Unmarshal(bodyText, &dictResponse)
	if err != nil {
		log.Fatal(err)
	}

	// 输出结果
	/*
		json里面的数据,很大部分都是我们不需要的。我们只需要翻译得到的结果
	*/
	fmt.Println(word, "UK:", dictResponse.Dictionary.Prons.En, "US:", dictResponse.Dictionary.Prons.EnUs)
	for _, item := range dictResponse.Dictionary.Explanations {
		fmt.Println(item)
	}

.....

完整代码

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"os"
)

type DictRequest struct {
	TransType string `json:"trans_type"`
	Source    string `json:"source"`
	UserID    string `json:"user_id"`
}

type DictResponse struct {
	Rc   int `json:"rc"`
	Wiki struct {
		KnownInLaguages int `json:"known_in_laguages"`
		Description     struct {
			Source string      `json:"source"`
			Target interface{} `json:"target"`
		} `json:"description"`
		ID   string `json:"id"`
		Item struct {
			Source string `json:"source"`
			Target string `json:"target"`
		} `json:"item"`
		ImageURL  string `json:"image_url"`
		IsSubject string `json:"is_subject"`
		Sitelink  string `json:"sitelink"`
	} `json:"wiki"`
	Dictionary struct {
		Prons struct {
			EnUs string `json:"en-us"`
			En   string `json:"en"`
		} `json:"prons"`
		Explanations []string      `json:"explanations"`
		Synonym      []string      `json:"synonym"`
		Antonym      []string      `json:"antonym"`
		WqxExample   [][]string    `json:"wqx_example"`
		Entry        string        `json:"entry"`
		Type         string        `json:"type"`
		Related      []interface{} `json:"related"`
		Source       string        `json:"source"`
	} `json:"dictionary"`
}

func query(word string) {
	client := &http.Client{}
	request := DictRequest{TransType: "en2zh", Source: word}
	buf, err := json.Marshal(request)
	if err != nil {
		log.Fatal(err)
	}

	var data = bytes.NewReader(buf)
	req, err := http.NewRequest("POST", "https://api.interpreter.caiyunai.com/v1/dict", data)
	if err != nil {
		log.Fatal(err)
	}

	req.Header.Set("Connection", "keep-alive")
	req.Header.Set("DNT", "1")
	req.Header.Set("os-version", "")
	req.Header.Set("sec-ch-ua-mobile", "?0")
	req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36")
	req.Header.Set("app-name", "xy")
	req.Header.Set("Content-Type", "application/json;charset=UTF-8")
	req.Header.Set("Accept", "application/json, text/plain, */*")
	req.Header.Set("device-id", "")
	req.Header.Set("os-type", "web")
	req.Header.Set("X-Authorization", "token:qgemv4jr1y38jyq6vhvi")
	req.Header.Set("Origin", "https://fanyi.caiyunapp.com")
	req.Header.Set("Sec-Fetch-Site", "cross-site")
	req.Header.Set("Sec-Fetch-Mode", "cors")
	req.Header.Set("Sec-Fetch-Dest", "empty")
	req.Header.Set("Referer", "https://fanyi.caiyunapp.com/")
	req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")
	req.Header.Set("Cookie", "_ym_uid=16456948721020430059; _ym_d=1645694872")

	resp, err := client.Do(req)
	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close()

	bodyText, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}

	// 检查返回的状态码信息
	if resp.StatusCode != 200 {
		log.Fatal("bad StatusCode:", resp.StatusCode, "body", string(bodyText))
	}

	// 解析数据
	var dictResponse DictResponse
	err = json.Unmarshal(bodyText, &dictResponse)
	if err != nil {
		log.Fatal(err)
	}

	// 输出结果
	/*
		json里面的数据,很大部分都是我们不需要的。我们只需要翻译得到的结果
	*/
	fmt.Println(word, "UK:", dictResponse.Dictionary.Prons.En, "US:", dictResponse.Dictionary.Prons.EnUs)
	for _, item := range dictResponse.Dictionary.Explanations {
		fmt.Println(item)
	}
}

func main() {
	if len(os.Args) != 2 {
		fmt.Fprintf(os.Stderr, `usage: simpleDict WORD
example: simpleDict hello
		`)
		os.Exit(1)
	}
	word := os.Args[1]
	query(word)
}

原版的代码中:word := os.Args[1] 需要注意一下:

  • 这里如果直接从 golang 启动的话,控制台是没有任何响应的

  • 需要在命令行中进行输入,注意要跳转到指定的目录下执行

    • PS D:\Users\quan\Desktop\字节跳动青训营\go-by-example-master\simpledict\v4> go run main.go hello
      
    • go run main.go hello
      

结果:

hello UK: [ˈheˈləu] US: [həˈlo]
int.喂;哈罗   
n.引人注意的呼声
v.向人呼(喂)  

V5 版本

这是课后作业,需要增加多一个翻译引擎的支持,这里我们选择 火山翻译 - 让翻译更简单 (volcengine.com)

还是按照上面分析的套路进行

  • 获取接口 url
  • 获取请求结构体
  • 发送请求
  • 获取响应
  • 解析响应并输出

自动生成结构体

type AutoGenerated struct {
	SourceLanguage string `json:"source_language"`
	TargetLanguage string `json:"target_language"`
	Text string `json:"text"`
	HomeLanguage string `json:"home_language"`
	Category string `json:"category"`
	GlossaryList []string `json:"glossary_list"`
}

curl 自动生成代码

package main

import (
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"strings"
)

func main() {
	client := &http.Client{}
	var data = strings.NewReader(`{"source_language":"detect","target_language":"en","text":"word","home_language":"zh","category":"","glossary_list":["ailab/menu"]}`)
	req, err := http.NewRequest("POST", "https://translate.volcengine.com/web/translate/v1/?msToken=&X-Bogus=DFSzswVLQDat0rhSSWD5-KXAIQRv&_signature=_02B4Z6wo00001hzNOBgAAIDDf8f4clQIRzIczTyAAOVVfnGTDzcIWCIHXTST30Xttv40K85l9qNkRPhUaVJgMZtnGjYvAbvN9AV.TufrgZWAsXD.jQIflBFTr4D68uZ.v8TPtWCM2aFmOpbLb2", data)
	if err != nil {
		log.Fatal(err)
	}
	req.Header.Set("authority", "translate.volcengine.com")
	req.Header.Set("accept", "application/json, text/plain, */*")
	req.Header.Set("accept-language", "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6")
	req.Header.Set("content-type", "application/json")
	req.Header.Set("cookie", "x-jupiter-uuid=16518261326983940; i18next=zh-CN; ttcid=2e9529736ecc4d988fea0f390115f4b328; tt_scid=-uEZ033psgWqZR921uXZdgOqXNqeDGoRjfcKFvCaBZHprsoDRYFom2OuLPtR9ZvQbd80; s_v_web_id=verify_099b806af26b4a3e3b8ac5dcca239f49; _tea_utm_cache_2018=undefined")
	req.Header.Set("origin", "https://translate.volcengine.com")
	req.Header.Set("referer", "https://translate.volcengine.com/translate?category=&home_language=zh&source_language=detect&target_language=en&text=")
	req.Header.Set("sec-ch-ua", `" Not A;Brand";v="99", "Chromium";v="101", "Microsoft Edge";v="101"`)
	req.Header.Set("sec-ch-ua-mobile", "?0")
	req.Header.Set("sec-ch-ua-platform", `"Windows"`)
	req.Header.Set("sec-fetch-dest", "empty")
	req.Header.Set("sec-fetch-mode", "cors")
	req.Header.Set("sec-fetch-site", "same-origin")
	req.Header.Set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32")
	resp, err := client.Do(req)
	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close()
	bodyText, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Printf("%s\n", bodyText)
}

对 V4 做相关的修改

func query(num int, word string) {
	switch num {
	case 1:
		volcen(word)
	case 2:
		caiyun(word)
	}
}

func multiQuery(word string) {
	go volcen(word)
	go caiyun(word)
}

var wg sync.WaitGroup
func main() {

	word := "hello"
	query(1, word)
	//multiQuery(word)
}