ChatGLMForSequenceClassification
class ChatGLMForSequenceClassification(ChatGLMPreTrainedModel):
def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
super().__init__(config)
self.num_labels = config.num_labels
self.transformer = ChatGLMModel(config, empty_init=empty_init, device=device)
self.classifier_head = nn.Linear(config.hidden_size, config.num_labels, bias=True, dtype=torch.half)
if config.classifier_dropout is not None:
self.dropout = nn.Dropout(config.classifier_dropout)
else:
self.dropout = None
self.config = config
if self.config.quantization_bit:
self.quantize(self.config.quantization_bit, empty_init=True)
def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
position_ids: Optional[torch.LongTensor] = None,
attention_mask: Optional[torch.Tensor] = None,
full_attention_mask: Optional[torch.Tensor] = None,
past_key_values: Optional[Tuple[Tuple[torch.Tensor, torch.Tensor], ...]] = None,
inputs_embeds: Optional[torch.LongTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor, ...], SequenceClassifierOutputWithPast]:
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
transformer_outputs = self.transformer(
input_ids=input_ids,
position_ids=position_ids,
attention_mask=attention_mask,
full_attention_mask=full_attention_mask,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
use_cache=use_cache,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
hidden_states = transformer_outputs[0]
pooled_hidden_states = hidden_states[-1]
if self.dropout is not None:
pooled_hidden_states = self.dropout(pooled_hidden_states)
logits = self.classifier_head(pooled_hidden_states)
loss = None
if labels is not None:
if self.config.problem_type is None:
if self.num_labels == 1:
self.config.problem_type = "regression"
elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
self.config.problem_type = "single_label_classification"
else:
self.config.problem_type = "multi_label_classification"
if self.config.problem_type == "regression":
loss_fct = MSELoss()
if self.num_labels == 1:
loss = loss_fct(logits.squeeze().float(), labels.squeeze())
else:
loss = loss_fct(logits.float(), labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels).float(), labels.view(-1))
elif self.config.problem_type == "multi_label_classification":
loss_fct = BCEWithLogitsLoss()
loss = loss_fct(logits.float(), labels.view(-1, self.num_labels))
if not return_dict:
output = (logits,) + transformer_outputs[1:]
return ((loss,) + output) if loss is not None else output
return SequenceClassifierOutputWithPast(
loss=loss,
logits=logits,
past_key_values=transformer_outputs.past_key_values,
hidden_states=transformer_outputs.hidden_states,
attentions=transformer_outputs.attentions,
)
ChatGLMForConditionalGeneration.chat()
In [1]: q = '你好'
In [2]: r, his = model.chat(tok, q)
In [3]: r
Out[3]: '\n 你好!很高兴见到你。有什么问题我可以帮助你解答吗?'
In [4]: his
Out[4]:
[{'role': 'user', 'content': '你好'},
{'role': 'assistant', 'metadata': '', 'content': '你好!很高兴见到你。有什么问题我可以帮助你解答吗?'}]
In [5]: q = '你可以做什么?'
In [6]: r, his = model.chat(tok, q, history=his)
In [7]: r
Out[7]: '\n 作为人工智能助手,我可以帮助您解答各种问题。以下是一些我擅长的领域:\n\n1. 日常生活建议:如购物建议、健康建议、旅行建议等。\n2. 学习辅导:如数学、科学、历史等学科问题。\n3. 语言学习:如中文、英文、日语等语言学习。\n4. 娱乐休闲:如音乐、电影、书籍 、游戏等推荐。\n5. 技术支持:如操作系统、软件应用、电子设备等使用问题。\n\n当然,我会不断学习和进步,随着时间的推移,我将能帮助您 解答更多领域的疑问。如果您有任何问题,请随时向我提问。'
In [8]: his
Out[8]:
[{'role': 'user', 'content': '你好'},
{'role': 'assistant', 'metadata': '', 'content': '你好!很高兴见到你。有什么问题我可以帮助你解答吗?'},
{'role': 'user', 'content': '你可以做什么?'},
{'role': 'assistant',
'metadata': '',
'content': '作为人工智能助手,我可以帮助您解答各种问题。以下是一些我擅长的领域:\n\n1. 日常生活建议:如购物建议、健康建议、旅行 建议等。\n2. 学习辅导:如数学、科学、历史等学科问题。\n3. 语言学习:如中文、英文、日语等语言学习。\n4. 娱乐休闲:如音乐、电影、书 籍、游戏等推荐。\n5. 技术支持:如操作系统、软件应用、电子设备等使用问题。\n\n当然,我会不断学习和进步,随着时间的推移,我将能帮助 您解答更多领域的疑问。如果您有任何问题,请随时向我提问。'}]
@torch.inference_mode()
def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, role: str = "user",
max_length: int = 32768, num_beams=1, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None,
**kwargs):
if history is None:
history = []
if logits_processor is None:
logits_processor = LogitsProcessorList()
logits_processor.append(InvalidScoreLogitsProcessor())
gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
"temperature": temperature, "logits_processor": logits_processor, **kwargs}
'''
In [1]: tok.build_chat_input('你好')
Out[1]: {'input_ids': tensor([[64790, 64792, 64795, 30910, 13, 36474, 54591, 64796]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]]), 'position_ids': tensor([[0, 1, 2, 3, 4, 5, 6, 7]])}
In [2]: tok.decode(_1['input_ids'][0])
Out[2]: '[gMASK]sop<|user|> \n 你好<|assistant|>'
'''
inputs = tokenizer.build_chat_input(query, history=history, role=role)
inputs = inputs.to(self.device)
eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"),
tokenizer.get_command("<|observation|>")]
outputs = self.generate(**inputs, **gen_kwargs, eos_token_id=eos_token_id)
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
response = tokenizer.decode(outputs)
history.append({"role": role, "content": query})
response, history = self.process_response(response, history)
return response, history
def process_response(self, output, history):
content = ""
history = deepcopy(history)
for response in output.split("<|assistant|>"):
metadata, content = response.split("\n", maxsplit=1)
if not metadata.strip():
content = content.strip()
history.append({"role": "assistant", "metadata": metadata, "content": content})
content = content.replace("[[训练时间]]", "2023年")
else:
history.append({"role": "assistant", "metadata": metadata, "content": content})
if history[0]["role"] == "system" and "tools" in history[0]:
content = "\n".join(content.split("\n")[1:-1])
def tool_call(**kwargs):
return kwargs
parameters = eval(content)
content = {"name": metadata.strip(), "parameters": parameters}
else:
content = {"name": metadata.strip(), "content": content}
return content, history
ChatGLMForConditionalGeneration.stream_chat()
In [19]: q = '你好'
In [23]: it = model.stream_chat(tok, q)
In [24]: for r, his in it: print(repr(r)); print(repr(his))
'\n'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': ''}]
'\n 你'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你'}]
'\n 你好'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你好'}]
...
'\n 你好👋!很高兴见到你,有什么我可以帮你的吗'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你好👋!很高兴见到你,有什么我可以帮你的吗'}]
'\n 你好👋!很高兴见到你,有什么我可以帮你的吗?'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你好👋!很高兴见到你,有什么我可以帮你的吗?'}]
In [25]: q = '你可以做什么?'
In [26]: it = model.stream_chat(tok, q, history=his)
In [27]: for r, his in it: print(repr(r)); print(repr(his))
'\n'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你好👋!很高兴见到你,有什么我可以帮你的吗?'}, {'role': 'user', 'content': '你可以做什么?'}, {'role': 'assistant', 'metadata': '', 'content': ''}]
'\n 我'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你好👋!很高兴见到你,有什么我可以帮你的吗?'}, {'role': 'user', 'content': '你可以做什么?'}, {'role': 'assistant', 'metadata': '', 'content': '我'}]
'\n 我可以帮助'
...
'\n 我可以帮助你解答各种问题,例如:\n\n* 科学和数学问题\n* 历史和文化问题\n* 技术问题和建议\n* 语言学习\n* 日常交流\n\n以及许多其他主题。如果有什么具体的问题,请随时问我'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你好👋!很高兴见到你,有什么我可以帮你的吗?'}, {'role': 'user', 'content': '你可以做什么?'}, {'role': 'assistant', 'metadata': '', 'content': '我可以帮助你解答各种问题, 例如:\n\n* 科学和数学问题\n* 历史和文化问题\n* 技术问题和建议\n* 语言学习\n* 日常交流\n\n以及许多其他主题。如果有什么具体的问题,请随时问我'}]
'\n 我可以帮助你解答各种问题,例如:\n\n* 科学和数学问题\n* 历史和文化问题\n* 技术问题和建议\n* 语言学习\n* 日常交流\n\n以及许多其他主题。如果有什么具体的问题,请随时问我。'
[{'role': 'user', 'content': '你好'}, {'role': 'assistant', 'metadata': '', 'content': '你好👋!很高兴见到你,有什么我可以帮你的吗?'}, {'role': 'user', 'content': '你可以做什么?'}, {'role': 'assistant', 'metadata': '', 'content': '我可以帮助你解答各种问题, 例如:\n\n* 科学和数学问题\n* 历史和文化问题\n* 技术问题和建议\n* 语言学习\n* 日常交流\n\n以及许多其他主题。如果有什么具体的问题,请随时问我。'}]
@torch.inference_mode()
def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, role: str = "user",
past_key_values=None, max_length: int = 32768, do_sample=True, top_p=0.8, temperature=0.8,
logits_processor=None, return_past_key_values=False, **kwargs):
if history is None:
history = []
if logits_processor is None:
logits_processor = LogitsProcessorList()
logits_processor.append(InvalidScoreLogitsProcessor())
eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"),
tokenizer.get_command("<|observation|>")]
gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
"temperature": temperature, "logits_processor": logits_processor, **kwargs}
'''
In [1]: tok.decode(tok.build_chat_input('Q3',history=[
...: {'role': 'user', 'content': 'Q1'},
...: {'role': 'assistant', 'content': 'A1'},
...: {'role': 'user', 'content': 'Q2'},
...: {'role': 'assistant', 'content': 'A2'},
...: ])['input_ids'][0])
Out[1]: '[gMASK]sop<|user|> \n Q1<|assistant|> \n A1<|user|> \n Q2<|assistant|> \n A2<|user|> \n Q3<|assistant|>'
'''
if past_key_values is None:
inputs = tokenizer.build_chat_input(query, history=history, role=role)
else:
inputs = tokenizer.build_chat_input(query, role=role)
inputs = inputs.to(self.device)
if past_key_values is not None:
past_length = past_key_values[0][0].shape[0]
if self.transformer.pre_seq_len is not None:
past_length -= self.transformer.pre_seq_len
inputs.position_ids += past_length
attention_mask = inputs.attention_mask
attention_mask = torch.cat((attention_mask.new_ones(1, past_length), attention_mask), dim=1)
inputs['attention_mask'] = attention_mask
history.append({"role": role, "content": query})
for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
**gen_kwargs):
if return_past_key_values:
outputs, past_key_values = outputs
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
response = tokenizer.decode(outputs)
if response and response[-1] != "�":
response, new_history = self.process_response(response, history)
if return_past_key_values:
yield response, new_history, past_key_values
else:
yield response, new_history