Transformers 源码解析(一百三十二)
.\pipelines\pt_utils.py
import numpy as np
import torch
from torch.utils.data import Dataset, IterableDataset
from ..utils.generic import ModelOutput
class PipelineDataset(Dataset):
def __init__(self, dataset, process, params):
self.dataset = dataset
self.process = process
self.params = params
def __len__(self):
return len(self.dataset)
def __getitem__(self, i):
item = self.dataset[i]
processed = self.process(item, **self.params)
return processed
class PipelineIterator(IterableDataset):
def __init__(self, loader, infer, params, loader_batch_size=None):
"""
大致相当于
```
for item in loader:
yield infer(item, **params)
```
参数:
loader (`torch.utils.data.DataLoader` 或任何迭代器):
将应用 `infer` 函数的迭代器。
infer (任何函数):
要应用于 `loader` 每个元素的函数。
params (`dict`):
传递给 `infer` 函数的参数。
loader_batch_size (`int`, *可选*):
如果指定,则假设 `loader` 中的项作为批次进行处理,并在此处加载批次,
使其大致行为为
```
for items in loader:
for i in loader_batch_size:
item = items[i]
yield infer(item, **params)
```"""
self.loader = loader
self.infer = infer
self.params = params
if loader_batch_size == 1:
loader_batch_size = None
self.loader_batch_size = loader_batch_size
self._loader_batch_index = None
self._loader_batch_data = None
def __len__(self):
return len(self.loader)
def __iter__(self):
self.iterator = iter(self.loader)
return self
def loader_batch_item(self):
"""
Return item located at `loader_batch_index` within the current `loader_batch_data`.
"""
if isinstance(self._loader_batch_data, torch.Tensor):
result = self._loader_batch_data[self._loader_batch_index].unsqueeze(0)
else:
loader_batched = {}
for k, element in self._loader_batch_data.items():
if isinstance(element, ModelOutput):
element = element.to_tuple()
if isinstance(element[0], torch.Tensor):
loader_batched[k] = tuple(el[self._loader_batch_index].unsqueeze(0) for el in element)
elif isinstance(element[0], np.ndarray):
loader_batched[k] = tuple(np.expand_dims(el[self._loader_batch_index], 0) for el in element)
continue
if k in {"hidden_states", "past_key_values", "attentions"} and isinstance(element, tuple):
if isinstance(element[0], torch.Tensor):
loader_batched[k] = tuple(el[self._loader_batch_index].unsqueeze(0) for el in element)
elif isinstance(element[0], np.ndarray):
loader_batched[k] = tuple(np.expand_dims(el[self._loader_batch_index], 0) for el in element)
continue
if element is None:
loader_batched[k] = None
elif isinstance(element[self._loader_batch_index], torch.Tensor):
loader_batched[k] = element[self._loader_batch_index].unsqueeze(0)
elif isinstance(element[self._loader_batch_index], np.ndarray):
loader_batched[k] = np.expand_dims(element[self._loader_batch_index], 0)
else:
loader_batched[k] = element[self._loader_batch_index]
result = self._loader_batch_data.__class__(loader_batched)
self._loader_batch_index += 1
return result
def __next__(self):
if self._loader_batch_index is not None and self._loader_batch_index < self.loader_batch_size:
return self.loader_batch_item()
item = next(self.iterator)
processed = self.infer(item, **self.params)
if self.loader_batch_size is not None:
if isinstance(processed, torch.Tensor):
first_tensor = processed
else:
key = list(processed.keys())[0]
first_tensor = processed[key]
if isinstance(first_tensor, list):
observed_batch_size = len(first_tensor)
else:
observed_batch_size = first_tensor.shape[0]
if 0 < observed_batch_size < self.loader_batch_size:
self.loader_batch_size = observed_batch_size
self._loader_batch_data = processed
self._loader_batch_index = 0
return self.loader_batch_item()
else:
return processed
class PipelineChunkIterator(PipelineIterator):
def __init__(self, loader, infer, params, loader_batch_size=None):
"""
Roughly equivalent to
```
for iterator in loader:
for item in iterator:
yield infer(item, **params)
```
Arguments:
loader (`torch.utils.data.DataLoader` or any iterator):
The iterator that will be used to apply `infer` on.
infer (any function):
The function to apply of each element of `loader`.
params (`dict`):
The parameters passed to `infer` along with every item
"""
super().__init__(loader, infer, params)
def __iter__(self):
self.iterator = iter(self.loader)
self.subiterator = None
return self
def __next__(self):
if self.subiterator is None:
self.subiterator = self.infer(next(self.iterator), **self.params)
try:
processed = next(self.subiterator)
except StopIteration:
self.subiterator = self.infer(next(self.iterator), **self.params)
processed = next(self.subiterator)
return processed
class PipelinePackIterator(PipelineIterator):
"""
Roughly equivalent to
```
packed = []
for item in loader:
packed.append(item)
if item["is_last"]:
yield packed
packed = []
```
but it also handles cases where `item` are batched (meaning it's a dict of Tensor with first dimension > 1). In
that case it does
```
packed = []
for batch in loader:
# item is batched
for item in batch:
packed.append(item)
if item["is_last"]:
yield packed
packed = []
```
Arguments:
loader (`torch.utils.data.DataLoader` or any iterator):
The iterator that will be used to apply `infer` on.
infer (any function):
The function to apply to each element of `loader`.
params (`dict`):
The parameters passed to `infer` along with every item
loader_batch_size (`int`, *optional*):
If specified, the items of `loader` are supposed to come as batch, and are loader_batched here making
it roughly behave as
"""
for items in loader:
for i in loader_batch_size:
item = items[i]
yield infer(item, **params)
def __iter__(self):
self.iterator = iter(self.loader)
return self
def __next__(self):
is_last = False
accumulator = []
if self._loader_batch_index is not None and self._loader_batch_index < self.loader_batch_size:
while self._loader_batch_index < self.loader_batch_size:
item = self.loader_batch_item()
is_last = item.pop("is_last")
accumulator.append(item)
if is_last:
return accumulator
while not is_last:
processed = self.infer(next(self.iterator), **self.params)
if self.loader_batch_size is not None:
if isinstance(processed, torch.Tensor):
first_tensor = processed
else:
key = list(processed.keys())[0]
first_tensor = processed[key]
if isinstance(first_tensor, list):
observed_batch_size = len(first_tensor)
else:
observed_batch_size = first_tensor.shape[0]
if 0 < observed_batch_size < self.loader_batch_size:
self.loader_batch_size = observed_batch_size
self._loader_batch_data = processed
self._loader_batch_index = 0
while self._loader_batch_index < self.loader_batch_size:
item = self.loader_batch_item()
is_last = item.pop("is_last")
accumulator.append(item)
if is_last:
return accumulator
else:
item = processed
is_last = item.pop("is_last")
accumulator.append(item)
return accumulator
class KeyDataset(Dataset):
def __init__(self, dataset: Dataset, key: str):
self.dataset = dataset
self.key = key
def __len__(self):
return len(self.dataset)
def __getitem__(self, i):
return self.dataset[i][self.key]
class KeyPairDataset(Dataset):
def __init__(self, dataset: Dataset, key1: str, key2: str):
self.dataset = dataset
self.key1 = key1
self.key2 = key2
def __len__(self):
return len(self.dataset)
def __getitem__(self, i):
return {"text": self.dataset[i][self.key1], "text_pair": self.dataset[i][self.key2]}
.\pipelines\question_answering.py
import inspect
import types
import warnings
from collections.abc import Iterable
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
import numpy as np
from ..data import SquadExample, SquadFeatures, squad_convert_examples_to_features
from ..modelcard import ModelCard
from ..tokenization_utils import PreTrainedTokenizer
from ..utils import (
PaddingStrategy,
add_end_docstrings,
is_tf_available,
is_tokenizers_available,
is_torch_available,
logging,
)
from .base import ArgumentHandler, ChunkPipeline, build_pipeline_init_args
logger = logging.get_logger(__name__)
if TYPE_CHECKING:
from ..modeling_tf_utils import TFPreTrainedModel
from ..modeling_utils import PreTrainedModel
if is_tokenizers_available():
import tokenizers
if is_tf_available():
import tensorflow as tf
from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
Dataset = None
if is_torch_available():
import torch
from torch.utils.data import Dataset
from ..models.auto.modeling_auto import MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
def decode_spans(
start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int, undesired_tokens: np.ndarray
) -> Tuple:
"""
从任何ModelForQuestionAnswering的输出中获取每个可能答案区间的概率,并生成这些区间的概率。
同时过滤掉一些不想要/不可能的情况,比如答案长度大于max_answer_len或者答案结束位置在起始位置之前。
通过topk参数支持输出k个最佳答案区间。
Args:
start (`np.ndarray`): 每个token的起始概率
end (`np.ndarray`): 每个token的结束概率
topk (`int`): 指示从模型输出中提取多少个可能的答案区间
max_answer_len (`int`): 从模型输出中提取的答案的最大长度
undesired_tokens (`np.ndarray`): 掩码,确定可以成为答案一部分的token
"""
if start.ndim == 1:
start = start[None]
if end.ndim == 1:
end = end[None]
outer = np.matmul(np.expand_dims(start, -1), np.expand_dims(end, 1))
candidates = np.tril(np.triu(outer), max_answer_len - 1)
scores_flat = candidates.flatten()
if topk == 1:
idx_sort = [np.argmax(scores_flat)]
elif len(scores_flat) < topk:
idx_sort = np.argsort(-scores_flat)
else:
idx = np.argpartition(-scores_flat, topk)[0:topk]
idx_sort = idx[np.argsort(-scores_flat[idx])]
starts, ends = np.unravel_index(idx_sort, candidates.shape)[1:]
desired_spans = np.isin(starts, undesired_tokens.nonzero()) & np.isin(ends, undesired_tokens.nonzero())
starts = starts[desired_spans]
ends = ends[desired_spans]
scores = candidates[0, starts, ends]
return starts, ends, scores
def select_starts_ends(
start,
end,
p_mask,
attention_mask,
min_null_score=1000000,
top_k=1,
handle_impossible_answer=False,
max_answer_len=15,
):
"""
Takes the raw output of any `ModelForQuestionAnswering` and first normalizes its outputs and then uses
`decode_spans()` to generate probabilities for each span to be the actual answer.
Args:
start (`np.ndarray`): Individual start logits for each token.
每个 token 的起始 logits。
end (`np.ndarray`): Individual end logits for each token.
每个 token 的结束 logits。
p_mask (`np.ndarray`): A mask with 1 for values that cannot be in the answer
不能作为答案的值的掩码,值为1。
attention_mask (`np.ndarray`): The attention mask generated by the tokenizer
由分词器生成的注意力掩码。
min_null_score(`float`): The minimum null (empty) answer score seen so far.
到目前为止看到的最小空答案分数。
topk (`int`): Indicates how many possible answer span(s) to extract from the model output.
指示从模型输出中提取多少个可能的答案区间。
handle_impossible_answer(`bool`): Whether to allow null (empty) answers
是否允许空答案。
max_answer_len (`int`): Maximum size of the answer to extract from the model's output.
从模型输出中提取的答案的最大长度。
"""
undesired_tokens = np.abs(np.array(p_mask) - 1)
if attention_mask is not None:
undesired_tokens = undesired_tokens & attention_mask
undesired_tokens_mask = undesired_tokens == 0.0
start = np.where(undesired_tokens_mask, -10000.0, start)
end = np.where(undesired_tokens_mask, -10000.0, end)
start = np.exp(start - start.max(axis=-1, keepdims=True))
start = start / start.sum()
end = np.exp(end - end.max(axis=-1, keepdims=True))
end = end / end.sum()
if handle_impossible_answer:
min_null_score = min(min_null_score, (start[0, 0] * end[0, 0]).item())
start[0, 0] = end[0, 0] = 0.0
starts, ends, scores = decode_spans(start, end, top_k, max_answer_len, undesired_tokens)
return starts, ends, scores, min_null_score
class QuestionAnsweringArgumentHandler(ArgumentHandler):
"""
QuestionAnsweringPipeline requires the user to provide multiple arguments (i.e. question & context) to be mapped to
internal [`SquadExample`].
QuestionAnsweringArgumentHandler manages all the possible to create a [`SquadExample`] from the command-line
supplied arguments.
"""
def normalize(self, item):
if isinstance(item, SquadExample):
return item
elif isinstance(item, dict):
for k in ["question", "context"]:
if k not in item:
raise KeyError("You need to provide a dictionary with keys {question:..., context:...}")
elif item[k] is None:
raise ValueError(f"`{k}` cannot be None")
elif isinstance(item[k], str) and len(item[k]) == 0:
raise ValueError(f"`{k}` cannot be empty")
return QuestionAnsweringPipeline.create_sample(**item)
raise ValueError(f"{item} argument needs to be of type (SquadExample, dict)")
def __call__(self, *args, **kwargs):
if args is not None and len(args) > 0:
if len(args) == 1:
inputs = args[0]
elif len(args) == 2 and {type(el) for el in args} == {str}:
inputs = [{"question": args[0], "context": args[1]}]
else:
inputs = list(args)
elif "X" in kwargs:
inputs = kwargs["X"]
elif "data" in kwargs:
inputs = kwargs["data"]
elif "question" in kwargs and "context" in kwargs:
if isinstance(kwargs["question"], list) and isinstance(kwargs["context"], str):
inputs = [{"question": Q, "context": kwargs["context"]} for Q in kwargs["question"]]
elif isinstance(kwargs["question"], list) and isinstance(kwargs["context"], list):
if len(kwargs["question"]) != len(kwargs["context"]):
raise ValueError("Questions and contexts don't have the same lengths")
inputs = [{"question": Q, "context": C} for Q, C in zip(kwargs["question"], kwargs["context"])]
elif isinstance(kwargs["question"], str) and isinstance(kwargs["context"], str):
inputs = [{"question": kwargs["question"], "context": kwargs["context"]}]
else:
raise ValueError("Arguments can't be understood")
else:
raise ValueError(f"Unknown arguments {kwargs}")
generator_types = (types.GeneratorType, Dataset) if Dataset is not None else (types.GeneratorType,)
if isinstance(inputs, generator_types):
return inputs
if isinstance(inputs, dict):
inputs = [inputs]
elif isinstance(inputs, Iterable):
inputs = list(inputs)
else:
raise ValueError(f"Invalid arguments {kwargs}")
for i, item in enumerate(inputs):
inputs[i] = self.normalize(item)
return inputs
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class QuestionAnsweringPipeline(ChunkPipeline):
"""
Question Answering pipeline using any `ModelForQuestionAnswering`. See the [question answering
examples](../task_summary#question-answering) for more information.
Example:
```
>>> from transformers import pipeline
>>> oracle = pipeline(model="deepset/roberta-base-squad2")
>>> oracle(question="Where do I live?", context="My name is Wolfgang and I live in Berlin")
{'score': 0.9191, 'start': 34, 'end': 40, 'answer': 'Berlin'}
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This question answering pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"question-answering"`.
The models that this pipeline can use are models that have been fine-tuned on a question answering task. See the
up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=question-answering).
"""
default_input_names = "question,context"
handle_impossible_answer = False
def __init__(
self,
model: Union["PreTrainedModel", "TFPreTrainedModel"],
tokenizer: PreTrainedTokenizer,
modelcard: Optional[ModelCard] = None,
framework: Optional[str] = None,
task: str = "",
**kwargs,
):
"""
Initialize the QuestionAnsweringPipeline object.
Args:
model (Union["PreTrainedModel", "TFPreTrainedModel"]): The pre-trained model to use for question answering.
tokenizer (PreTrainedTokenizer): Tokenizer associated with the model for tokenizing inputs.
modelcard (Optional[ModelCard]): Model card containing details about the model (optional).
framework (Optional[str]): The framework for the model, e.g., "tf" or "pt" (optional).
task (str): Identifier for the task associated with the pipeline (optional).
**kwargs: Additional keyword arguments passed to parent class constructor.
"""
super().__init__(
model=model,
tokenizer=tokenizer,
modelcard=modelcard,
framework=framework,
task=task,
**kwargs,
)
self._args_parser = QuestionAnsweringArgumentHandler()
self.check_model_type(
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
if self.framework == "tf"
else MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
)
@staticmethod
def create_sample(
question: Union[str, List[str]], context: Union[str, List[str]]
) -> Union[SquadExample, List[SquadExample]]:
"""
Create a SquadExample or a list of SquadExamples from given questions and contexts.
This helper method encapsulates the logic for converting questions and contexts into SquadExample instances.
Args:
question (Union[str, List[str]]): The question or list of questions.
context (Union[str, List[str]]): The context or list of contexts.
Returns:
Union[SquadExample, List[SquadExample]]: The corresponding SquadExample or list of SquadExamples.
"""
if isinstance(question, list):
return [SquadExample(None, q, c, None, None, None) for q, c in zip(question, context)]
else:
return SquadExample(None, question, context, None, None, None)
def _sanitize_parameters(
self,
padding=None,
topk=None,
top_k=None,
doc_stride=None,
max_answer_len=None,
max_seq_len=None,
max_question_len=None,
handle_impossible_answer=None,
align_to_words=None,
**kwargs,
):
preprocess_params = {}
if padding is not None:
preprocess_params["padding"] = padding
if doc_stride is not None:
preprocess_params["doc_stride"] = doc_stride
if max_question_len is not None:
preprocess_params["max_question_len"] = max_question_len
if max_seq_len is not None:
preprocess_params["max_seq_len"] = max_seq_len
postprocess_params = {}
if topk is not None and top_k is None:
warnings.warn("topk parameter is deprecated, use top_k instead", UserWarning)
top_k = topk
if top_k is not None:
if top_k < 1:
raise ValueError(f"top_k parameter should be >= 1 (got {top_k})")
postprocess_params["top_k"] = top_k
if max_answer_len is not None:
if max_answer_len < 1:
raise ValueError(f"max_answer_len parameter should be >= 1 (got {max_answer_len}")
postprocess_params["max_answer_len"] = max_answer_len
if handle_impossible_answer is not None:
postprocess_params["handle_impossible_answer"] = handle_impossible_answer
if align_to_words is not None:
postprocess_params["align_to_words"] = align_to_words
return preprocess_params, {}, postprocess_params
def _forward(self, inputs):
example = inputs["example"]
model_inputs = {k: inputs[k] for k in self.tokenizer.model_input_names}
model_forward = self.model.forward if self.framework == "pt" else self.model.call
if "use_cache" in inspect.signature(model_forward).parameters.keys():
model_inputs["use_cache"] = False
output = self.model(**model_inputs)
if isinstance(output, dict):
return {"start": output["start_logits"], "end": output["end_logits"], "example": example, **inputs}
else:
start, end = output[:2]
return {"start": start, "end": end, "example": example, **inputs}
def postprocess(
self,
model_outputs,
top_k=1,
handle_impossible_answer=False,
max_answer_len=15,
align_to_words=True,
):
def get_indices(
self, enc: "tokenizers.Encoding", s: int, e: int, sequence_index: int, align_to_words: bool
):
) -> Tuple[int, int]:
if align_to_words:
try:
start_word = enc.token_to_word(s)
end_word = enc.token_to_word(e)
start_index = enc.word_to_chars(start_word, sequence_index=sequence_index)[0]
end_index = enc.word_to_chars(end_word, sequence_index=sequence_index)[1]
except Exception:
start_index = enc.offsets[s][0]
end_index = enc.offsets[e][1]
else:
start_index = enc.offsets[s][0]
end_index = enc.offsets[e][1]
return start_index, end_index
def span_to_answer(self, text: str, start: int, end: int) -> Dict[str, Union[str, int]]:
"""
从 token 概率解码时,将 token 索引映射回初始上下文中的实际单词。
Args:
text (`str`): 要从中提取答案的实际上下文。
start (`int`): 答案起始 token 索引。
end (`int`): 答案结束 token 索引。
Returns:
类似 `{'answer': str, 'start': int, 'end': int}` 的字典。
"""
words = []
token_idx = char_start_idx = char_end_idx = chars_idx = 0
for i, word in enumerate(text.split(" ")):
token = self.tokenizer.tokenize(word)
if start <= token_idx <= end:
if token_idx == start:
char_start_idx = chars_idx
if token_idx == end:
char_end_idx = chars_idx + len(word)
words += [word]
if token_idx > end:
break
token_idx += len(token)
chars_idx += len(word) + 1
return {
"answer": " ".join(words),
"start": max(0, char_start_idx),
"end": min(len(text), char_end_idx),
}
.\pipelines\table_question_answering.py
import collections
import types
import numpy as np
from ..utils import (
add_end_docstrings,
is_tf_available,
is_torch_available,
requires_backends,
)
from .base import (
ArgumentHandler,
Dataset,
Pipeline,
PipelineException,
build_pipeline_init_args,
)
if is_torch_available():
import torch
from ..models.auto.modeling_auto import (
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES,
)
if is_tf_available():
import tensorflow as tf
from ..models.auto.modeling_tf_auto import (
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES,
)
class TableQuestionAnsweringArgumentHandler(ArgumentHandler):
"""
处理 TableQuestionAnsweringPipeline 的参数
"""
def __call__(self, table=None, query=None, **kwargs):
requires_backends(self, "pandas")
import pandas as pd
if table is None:
raise ValueError("Keyword argument `table` cannot be None.")
elif query is None:
if isinstance(table, dict) and table.get("query") is not None and table.get("table") is not None:
tqa_pipeline_inputs = [table]
elif isinstance(table, list) and len(table) > 0:
if not all(isinstance(d, dict) for d in table):
raise ValueError(
f"Keyword argument `table` should be a list of dict, but is {(type(d) for d in table)}"
)
if table[0].get("query") is not None and table[0].get("table") is not None:
tqa_pipeline_inputs = table
else:
raise ValueError(
"If keyword argument `table` is a list of dictionaries, each dictionary should have a `table`"
f" and `query` key, but only dictionary has keys {table[0].keys()} `table` and `query` keys."
)
elif Dataset is not None and isinstance(table, Dataset) or isinstance(table, types.GeneratorType):
return table
else:
raise ValueError(
"Invalid input. Keyword argument `table` should be either of type `dict` or `list`, but "
f"is {type(table)})"
)
else:
tqa_pipeline_inputs = [{"table": table, "query": query}]
for tqa_pipeline_input in tqa_pipeline_inputs:
if not isinstance(tqa_pipeline_input["table"], pd.DataFrame):
if tqa_pipeline_input["table"] is None:
raise ValueError("Table cannot be None.")
tqa_pipeline_input["table"] = pd.DataFrame(tqa_pipeline_input["table"])
return tqa_pipeline_inputs
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class TableQuestionAnsweringPipeline(Pipeline):
"""
Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
PyTorch.
Example:
```
>>> from transformers import pipeline
>>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
>>> table = {
... "Repository": ["Transformers", "Datasets", "Tokenizers"],
... "Stars": ["36542", "4512", "3934"],
... "Contributors": ["651", "77", "34"],
... "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
... }
>>> oracle(query="How many stars does the transformers repository have?", table=table)
{'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
identifier: `"table-question-answering"`.
The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
See the up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
"""
default_input_names = "table,query"
def __init__(self, args_parser=TableQuestionAnsweringArgumentHandler(), *args, **kwargs):
super().__init__(*args, **kwargs)
self._args_parser = args_parser
if self.framework == "tf":
mapping = TF_MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES.copy()
mapping.update(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES)
else:
mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMES.copy()
mapping.update(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES)
self.check_model_type(mapping)
self.aggregate = bool(getattr(self.model.config, "aggregation_labels", None)) and bool(
getattr(self.model.config, "num_aggregation_labels", None)
)
self.type = "tapas" if hasattr(self.model.config, "aggregation_labels") else None
def batch_inference(self, **inputs):
return self.model(**inputs)
def _sanitize_parameters(self, sequential=None, padding=None, truncation=None, **kwargs):
preprocess_params = {}
if padding is not None:
preprocess_params["padding"] = padding
if truncation is not None:
preprocess_params["truncation"] = truncation
forward_params = {}
if sequential is not None:
forward_params["sequential"] = sequential
return preprocess_params, forward_params, {}
def preprocess(self, pipeline_input, sequential=None, padding=True, truncation=None):
if truncation is None:
if self.type == "tapas":
truncation = "drop_rows_to_fit"
else:
truncation = "do_not_truncate"
table, query = pipeline_input["table"], pipeline_input["query"]
if table.empty:
raise ValueError("table is empty")
if query is None or query == "":
raise ValueError("query is empty")
inputs = self.tokenizer(table, query, return_tensors=self.framework, truncation=truncation, padding=padding)
inputs["table"] = table
return inputs
def _forward(self, model_inputs, sequential=False, **generate_kwargs):
table = model_inputs.pop("table")
if self.type == "tapas":
if sequential:
outputs = self.sequential_inference(**model_inputs)
else:
outputs = self.batch_inference(**model_inputs)
else:
outputs = self.model.generate(**model_inputs, **generate_kwargs)
model_outputs = {"model_inputs": model_inputs, "table": table, "outputs": outputs}
return model_outputs
def postprocess(self, model_outputs):
inputs = model_outputs["model_inputs"]
table = model_outputs["table"]
outputs = model_outputs["outputs"]
if self.type == "tapas":
if self.aggregate:
logits, logits_agg = outputs[:2]
predictions = self.tokenizer.convert_logits_to_predictions(inputs, logits, logits_agg)
answer_coordinates_batch, agg_predictions = predictions
aggregators = {i: self.model.config.aggregation_labels[pred] for i, pred in enumerate(agg_predictions)}
no_agg_label_index = self.model.config.no_aggregation_label_index
aggregators_prefix = {
i: aggregators[i] + " > " for i, pred in enumerate(agg_predictions) if pred != no_agg_label_index
}
else:
logits = outputs[0]
predictions = self.tokenizer.convert_logits_to_predictions(inputs, logits)
answer_coordinates_batch = predictions[0]
aggregators = {}
aggregators_prefix = {}
answers = []
for index, coordinates in enumerate(answer_coordinates_batch):
cells = [table.iat[coordinate] for coordinate in coordinates]
aggregator = aggregators.get(index, "")
aggregator_prefix = aggregators_prefix.get(index, "")
answer = {
"answer": aggregator_prefix + ", ".join(cells),
"coordinates": coordinates,
"cells": [table.iat[coordinate] for coordinate in coordinates],
}
if aggregator:
answer["aggregator"] = aggregator
answers.append(answer)
if len(answers) == 0:
raise PipelineException("Empty answer")
else:
answers = [{"answer": answer} for answer in self.tokenizer.batch_decode(outputs, skip_special_tokens=True)]
return answers if len(answers) > 1 else answers[0]
.\pipelines\text2text_generation.py
import enum
import warnings
from ..tokenization_utils import TruncationStrategy
from ..utils import add_end_docstrings, is_tf_available, is_torch_available, logging
from .base import Pipeline, build_pipeline_init_args
if is_tf_available():
import tensorflow as tf
from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
if is_torch_available():
from ..models.auto.modeling_auto import MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
logger = logging.get_logger(__name__)
class ReturnType(enum.Enum):
TENSORS = 0
TEXT = 1
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class Text2TextGenerationPipeline(Pipeline):
"""
Pipeline for text to text generation using seq2seq models.
Example:
```
>>> from transformers import pipeline
>>> generator = pipeline(model="mrm8488/t5-base-finetuned-question-generation-ap")
>>> generator(
... "answer: Manuel context: Manuel has created RuPERTa-base with the support of HF-Transformers and Google"
... )
[{'generated_text': 'question: Who created the RuPERTa-base?'}]
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial). You can pass text
generation parameters to this pipeline to control stopping criteria, decoding strategy, and more. Learn more about
text generation parameters in [Text generation strategies](../generation_strategies) and [Text
generation](text_generation).
This Text2TextGenerationPipeline pipeline can currently be loaded from [`pipeline`] using the following task
identifier: `"text2text-generation"`.
The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=text2text-generation). For a list of available
parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)
Usage:
```
text2text_generator = pipeline("text2text-generation")
text2text_generator("question: What is 42 ? context: 42 is the answer to life, the universe and everything")
```
"""
return_name = "generated"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.check_model_type(
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
if self.framework == "tf"
else MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
)
def _sanitize_parameters(
self,
return_tensors=None,
return_text=None,
return_type=None,
clean_up_tokenization_spaces=None,
truncation=None,
stop_sequence=None,
**generate_kwargs,
):
preprocess_params = {}
if truncation is not None:
preprocess_params["truncation"] = truncation
forward_params = generate_kwargs
postprocess_params = {}
if return_tensors is not None and return_type is None:
return_type = ReturnType.TENSORS if return_tensors else ReturnType.TEXT
if return_type is not None:
postprocess_params["return_type"] = return_type
if clean_up_tokenization_spaces is not None:
postprocess_params["clean_up_tokenization_spaces"] = clean_up_tokenization_spaces
if stop_sequence is not None:
stop_sequence_ids = self.tokenizer.encode(stop_sequence, add_special_tokens=False)
if len(stop_sequence_ids) > 1:
warnings.warn(
"Stopping on a multiple token sequence is not yet supported on transformers. The first token of"
" the stop sequence will be used as the stop sequence string in the interim."
)
generate_kwargs["eos_token_id"] = stop_sequence_ids[0]
return preprocess_params, forward_params, postprocess_params
def check_inputs(self, input_length: int, min_length: int, max_length: int):
"""
Checks whether there might be something wrong with given input with regard to the model.
"""
return True
def _parse_and_tokenize(self, *args, truncation):
prefix = self.model.config.prefix if self.model.config.prefix is not None else ""
if isinstance(args[0], list):
if self.tokenizer.pad_token_id is None:
raise ValueError("Please make sure that the tokenizer has a pad_token_id when using a batch input")
args = ([prefix + arg for arg in args[0]],)
padding = True
elif isinstance(args[0], str):
args = (prefix + args[0],)
padding = False
else:
raise ValueError(
f" `args[0]`: {args[0]} have the wrong format. The should be either of type `str` or type `list`"
)
inputs = self.tokenizer(*args, padding=padding, truncation=truncation, return_tensors=self.framework)
if "token_type_ids" in inputs:
del inputs["token_type_ids"]
return inputs
def __call__(self, *args, **kwargs):
r"""
Generate the output text(s) using text(s) given as inputs.
Args:
args (`str` or `List[str]`):
Input text for the encoder.
return_tensors (`bool`, *optional*, defaults to `False`):
Whether or not to include the tensors of predictions (as token indices) in the outputs.
return_text (`bool`, *optional*, defaults to `True`):
Whether or not to include the decoded texts in the outputs.
clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
Whether or not to clean up the potential extra spaces in the text output.
truncation (`TruncationStrategy`, *optional*, defaults to `TruncationStrategy.DO_NOT_TRUNCATE`):
The truncation strategy for the tokenization within the pipeline. `TruncationStrategy.DO_NOT_TRUNCATE`
(default) will never truncate, but it is sometimes desirable to truncate the input to fit the model's
max_length instead of throwing an error down the line.
generate_kwargs:
Additional keyword arguments to pass along to the generate method of the model (see the generate method
corresponding to your framework [here](./model#generative-models)).
Return:
A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:
- **generated_text** (`str`, present when `return_text=True`) -- The generated text.
- **generated_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
ids of the generated text.
"""
result = super().__call__(*args, **kwargs)
if (
isinstance(args[0], list)
and all(isinstance(el, str) for el in args[0])
and all(len(res) == 1 for res in result)
):
return [res[0] for res in result]
return result
def preprocess(self, inputs, truncation=TruncationStrategy.DO_NOT_TRUNCATE, **kwargs):
inputs = self._parse_and_tokenize(inputs, truncation=truncation, **kwargs)
return inputs
def _forward(self, model_inputs, **generate_kwargs):
if self.framework == "pt":
in_b, input_length = model_inputs["input_ids"].shape
elif self.framework == "tf":
in_b, input_length = tf.shape(model_inputs["input_ids"]).numpy()
self.check_inputs(
input_length,
generate_kwargs.get("min_length", self.model.config.min_length),
generate_kwargs.get("max_length", self.model.config.max_length),
)
output_ids = self.model.generate(**model_inputs, **generate_kwargs)
out_b = output_ids.shape[0]
if self.framework == "pt":
output_ids = output_ids.reshape(in_b, out_b // in_b, *output_ids.shape[1:])
elif self.framework == "tf":
output_ids = tf.reshape(output_ids, (in_b, out_b // in_b, *output_ids.shape[1:]))
return {"output_ids": output_ids}
def postprocess(self, model_outputs, return_type=ReturnType.TEXT, clean_up_tokenization_spaces=False):
records = []
for output_ids in model_outputs["output_ids"][0]:
if return_type == ReturnType.TENSORS:
record = {f"{self.return_name}_token_ids": output_ids}
elif return_type == ReturnType.TEXT:
record = {
f"{self.return_name}_text": self.tokenizer.decode(
output_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
)
}
records.append(record)
return records
class SummarizationPipeline(Text2TextGenerationPipeline):
"""
Summarize news articles and other documents.
This summarizing pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"summarization"`.
The models that this pipeline can use are models that have been fine-tuned on a summarization task, which is
currently, '*bart-large-cnn*', '*google-t5/t5-small*', '*google-t5/t5-base*', '*google-t5/t5-large*', '*google-t5/t5-3b*', '*google-t5/t5-11b*'. See the up-to-date
list of available models on [huggingface.co/models](https://huggingface.co/models?filter=summarization). For a list
of available parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)
Usage:
```
# use bart in pytorch
summarizer = pipeline("summarization")
summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)
# use t5 in tf
summarizer = pipeline("summarization", model="google-t5/t5-base", tokenizer="google-t5/t5-base", framework="tf")
summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)
```
"""
return_name = "summary"
def __call__(self, *args, **kwargs):
r"""
Summarize the text(s) given as inputs.
Args:
documents (*str* or `List[str]`):
One or several articles (or one list of articles) to summarize.
return_text (`bool`, *optional*, defaults to `True`):
Whether or not to include the decoded texts in the outputs
return_tensors (`bool`, *optional*, defaults to `False`):
Whether or not to include the tensors of predictions (as token indices) in the outputs.
clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
Whether or not to clean up the potential extra spaces in the text output.
generate_kwargs:
Additional keyword arguments to pass along to the generate method of the model (see the generate method
corresponding to your framework [here](./model#generative-models)).
Return:
A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:
- **summary_text** (`str`, present when `return_text=True`) -- The summary of the corresponding input.
- **summary_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
ids of the summary.
"""
return super().__call__(*args, **kwargs)
def check_inputs(self, input_length: int, min_length: int, max_length: int) -> bool:
"""
Checks whether there might be something wrong with given input with regard to the model.
"""
if max_length < min_length:
logger.warning(f"Your min_length={min_length} must be inferior than your max_length={max_length}.")
if input_length < max_length:
logger.warning(
f"Your max_length is set to {max_length}, but your input_length is only {input_length}. Since this is "
"a summarization task, where outputs shorter than the input are typically wanted, you might "
f"consider decreasing max_length manually, e.g. summarizer('...', max_length={input_length//2})"
)
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class TranslationPipeline(Text2TextGenerationPipeline):
"""
Translates from one language to another.
This translation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"translation_xx_to_yy"`.
The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
up-to-date list of available models on [huggingface.co/models](https://huggingface.co/models?filter=translation).
For a list of available parameters, see the [following
documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)
Usage:
```
en_fr_translator = pipeline("translation_en_to_fr")
en_fr_translator("How old are you?")
```
"""
return_name = "translation"
def check_inputs(self, input_length: int, min_length: int, max_length: int):
if input_length > 0.9 * max_length:
logger.warning(
f"Your input_length: {input_length} is bigger than 0.9 * max_length: {max_length}. You might consider "
"increasing your max_length manually, e.g. translator('...', max_length=400)"
)
return True
def preprocess(self, *args, truncation=TruncationStrategy.DO_NOT_TRUNCATE, src_lang=None, tgt_lang=None):
if getattr(self.tokenizer, "_build_translation_inputs", None):
return self.tokenizer._build_translation_inputs(
*args, return_tensors=self.framework, truncation=truncation, src_lang=src_lang, tgt_lang=tgt_lang
)
else:
return super()._parse_and_tokenize(*args, truncation=truncation)
def _sanitize_parameters(self, src_lang=None, tgt_lang=None, **kwargs):
preprocess_params, forward_params, postprocess_params = super()._sanitize_parameters(**kwargs)
if src_lang is not None:
preprocess_params["src_lang"] = src_lang
if tgt_lang is not None:
preprocess_params["tgt_lang"] = tgt_lang
if src_lang is None and tgt_lang is None:
task = kwargs.get("task", self.task)
items = task.split("_")
if task and len(items) == 4:
preprocess_params["src_lang"] = items[1]
preprocess_params["tgt_lang"] = items[3]
return preprocess_params, forward_params, postprocess_params
def __call__(self, *args, **kwargs):
r"""
Translate the text(s) given as inputs.
Args:
args (`str` or `List[str]`):
Texts to be translated.
return_tensors (`bool`, *optional*, defaults to `False`):
Whether or not to include the tensors of predictions (as token indices) in the outputs.
return_text (`bool`, *optional*, defaults to `True`):
Whether or not to include the decoded texts in the outputs.
clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
Whether or not to clean up the potential extra spaces in the text output.
src_lang (`str`, *optional*):
The language of the input. Might be required for multilingual models. Will not have any effect for
single pair translation models
tgt_lang (`str`, *optional*):
The language of the desired output. Might be required for multilingual models. Will not have any effect
for single pair translation models
generate_kwargs:
Additional keyword arguments to pass along to the generate method of the model (see the generate method
corresponding to your framework [here](./model#generative-models)).
Return:
A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:
- **translation_text** (`str`, present when `return_text=True`) -- The translation.
- **translation_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The
token ids of the translation.
"""
return super().__call__(*args, **kwargs)
.\pipelines\text_classification.py
import inspect
import warnings
from typing import Dict
import numpy as np
from ..utils import ExplicitEnum, add_end_docstrings, is_tf_available, is_torch_available
from .base import GenericTensor, Pipeline, build_pipeline_init_args
if is_tf_available():
from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
if is_torch_available():
from ..models.auto.modeling_auto import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
def sigmoid(_outputs):
return 1.0 / (1.0 + np.exp(-_outputs))
def softmax(_outputs):
maxes = np.max(_outputs, axis=-1, keepdims=True)
shifted_exp = np.exp(_outputs - maxes)
return shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
class ClassificationFunction(ExplicitEnum):
SIGMOID = "sigmoid"
SOFTMAX = "softmax"
NONE = "none"
@add_end_docstrings(
build_pipeline_init_args(has_tokenizer=True),
r"""
return_all_scores (`bool`, *optional*, defaults to `False`):
Whether to return all prediction scores or just the one of the predicted class.
function_to_apply (`str`, *optional*, defaults to `"default"`):
The function to apply to the model outputs in order to retrieve the scores. Accepts four different values:
- `"default"`: if the model has a single label, will apply the sigmoid function on the output. If the model
has several labels, will apply the softmax function on the output.
- `"sigmoid"`: Applies the sigmoid function on the output.
- `"softmax"`: Applies the softmax function on the output.
- `"none"`: Does not apply any function on the output.""",
)
class TextClassificationPipeline(Pipeline):
"""
Text classification pipeline using any `ModelForSequenceClassification`. See the [sequence classification
examples](../task_summary#sequence-classification) for more information.
Example:
```
>>> from transformers import pipeline
>>> classifier = pipeline(model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
>>> classifier("This movie is disgustingly good !")
[{'label': 'POSITIVE', 'score': 1.0}]
>>> classifier("Director tried too much.")
[{'label': 'NEGATIVE', 'score': 0.996}]
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This text classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"sentiment-analysis"` (for classifying sequences according to positive or negative sentiments).
If multiple classification labels are available (`model.config.num_labels >= 2`), the pipeline will run a softmax
over the results. If there is a single label, the pipeline will run a sigmoid over the result.
The models that this pipeline can use are models that have been fine-tuned on a sequence classification task. See
"""
"""
the up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=text-classification).
"""
return_all_scores = False
function_to_apply = ClassificationFunction.NONE
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.check_model_type(
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
if self.framework == "tf"
else MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
)
def _sanitize_parameters(self, return_all_scores=None, function_to_apply=None, top_k="", **tokenizer_kwargs):
preprocess_params = tokenizer_kwargs
postprocess_params = {}
if hasattr(self.model.config, "return_all_scores") and return_all_scores is None:
return_all_scores = self.model.config.return_all_scores
if isinstance(top_k, int) or top_k is None:
postprocess_params["top_k"] = top_k
postprocess_params["_legacy"] = False
elif return_all_scores is not None:
warnings.warn(
"`return_all_scores` is now deprecated, if want a similar functionality use `top_k=None` instead of"
" `return_all_scores=True` or `top_k=1` instead of `return_all_scores=False`.",
UserWarning,
)
if return_all_scores:
postprocess_params["top_k"] = None
else:
postprocess_params["top_k"] = 1
if isinstance(function_to_apply, str):
function_to_apply = ClassificationFunction[function_to_apply.upper()]
if function_to_apply is not None:
postprocess_params["function_to_apply"] = function_to_apply
return preprocess_params, {}, postprocess_params
def __call__(self, inputs, **kwargs):
"""
Classify the text(s) given as inputs.
Args:
inputs (`str` or `List[str]` or `Dict[str]`, or `List[Dict[str]]`):
One or several texts to classify. In order to use text pairs for your classification, you can send a
dictionary containing `{"text", "text_pair"}` keys, or a list of those.
top_k (`int`, *optional*, defaults to `1`):
How many results to return.
function_to_apply (`str`, *optional*, defaults to `"default"`):
The function to apply to the model outputs in order to retrieve the scores. Accepts four different
values:
If this argument is not specified, then it will apply the following functions according to the number
of labels:
- If the model has a single label, will apply the sigmoid function on the output.
- If the model has several labels, will apply the softmax function on the output.
Possible values are:
- `"sigmoid"`: Applies the sigmoid function on the output.
- `"softmax"`: Applies the softmax function on the output.
- `"none"`: Does not apply any function on the output.
Return:
A list or a list of list of `dict`: Each result comes as list of dictionaries with the following keys:
- **label** (`str`) -- The label predicted.
- **score** (`float`) -- The corresponding probability.
If `top_k` is used, one such dictionary is returned per label.
"""
inputs = (inputs,)
result = super().__call__(*inputs, **kwargs)
_legacy = "top_k" not in kwargs
if isinstance(inputs[0], str) and _legacy:
return [result]
else:
return result
def preprocess(self, inputs, **tokenizer_kwargs) -> Dict[str, GenericTensor]:
return_tensors = self.framework
if isinstance(inputs, dict):
return self.tokenizer(**inputs, return_tensors=return_tensors, **tokenizer_kwargs)
elif isinstance(inputs, list) and len(inputs) == 1 and isinstance(inputs[0], list) and len(inputs[0]) == 2:
return self.tokenizer(
text=inputs[0][0], text_pair=inputs[0][1], return_tensors=return_tensors, **tokenizer_kwargs
)
elif isinstance(inputs, list):
raise ValueError(
"The pipeline received invalid inputs, if you are trying to send text pairs, you can try to send a"
' dictionary `{"text": "My text", "text_pair": "My pair"}` in order to send a text pair.'
)
return self.tokenizer(inputs, return_tensors=return_tensors, **tokenizer_kwargs)
def _forward(self, model_inputs):
model_forward = self.model.forward if self.framework == "pt" else self.model.call
if "use_cache" in inspect.signature(model_forward).parameters.keys():
model_inputs["use_cache"] = False
return self.model(**model_inputs)
def postprocess(self, model_outputs, function_to_apply=None, top_k=1, _legacy=True):
if function_to_apply is None:
if self.model.config.problem_type == "multi_label_classification" or self.model.config.num_labels == 1:
function_to_apply = ClassificationFunction.SIGMOID
elif self.model.config.problem_type == "single_label_classification" or self.model.config.num_labels > 1:
function_to_apply = ClassificationFunction.SOFTMAX
elif hasattr(self.model.config, "function_to_apply") and function_to_apply is None:
function_to_apply = self.model.config.function_to_apply
else:
function_to_apply = ClassificationFunction.NONE
outputs = model_outputs["logits"][0]
outputs = outputs.numpy()
if function_to_apply == ClassificationFunction.SIGMOID:
scores = sigmoid(outputs)
elif function_to_apply == ClassificationFunction.SOFTMAX:
scores = softmax(outputs)
elif function_to_apply == ClassificationFunction.NONE:
scores = outputs
else:
raise ValueError(f"Unrecognized `function_to_apply` argument: {function_to_apply}")
if top_k == 1 and _legacy:
return {"label": self.model.config.id2label[scores.argmax().item()], "score": scores.max().item()}
dict_scores = [
{"label": self.model.config.id2label[i], "score": score.item()} for i, score in enumerate(scores)
]
if not _legacy:
dict_scores.sort(key=lambda x: x["score"], reverse=True)
if top_k is not None:
dict_scores = dict_scores[:top_k]
return dict_scores
.\pipelines\text_generation.py
import enum
import warnings
from typing import Dict
from ..utils import add_end_docstrings, is_tf_available, is_torch_available
from .base import Pipeline, build_pipeline_init_args
if is_torch_available():
from ..models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
if is_tf_available():
import tensorflow as tf
from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
class ReturnType(enum.Enum):
TENSORS = 0
NEW_TEXT = 1
FULL_TEXT = 2
class Chat:
"""This class is intended to just be used internally in this pipeline and not exposed to users. We convert chats
to this format because the rest of the pipeline code tends to assume that lists of messages are
actually a batch of samples rather than messages in the same conversation."""
def __init__(self, messages: Dict):
for message in messages:
if not ("role" in message and "content" in message):
raise ValueError("When passing chat dicts as input, each dict must have a 'role' and 'content' key.")
self.messages = messages
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class TextGenerationPipeline(Pipeline):
"""
Language generation pipeline using any `ModelWithLMHead`. This pipeline predicts the words that will follow a
specified text prompt. It can also accept one or more chats. Each chat takes the form of a list of dicts,
where each dict contains "role" and "content" keys.
Example:
```
>>> from transformers import pipeline
>>> generator = pipeline(model="openai-community/gpt2")
>>> generator("I can't believe you did such a ", do_sample=False)
[{'generated_text': "I can't believe you did such a icky thing to me. I'm so sorry. I'm so sorry. I'm so sorry. I'm so sorry. I'm so sorry. I'm so sorry. I'm so sorry. I"}]
>>> # These parameters will return suggestions, and only the newly created text making it easier for prompting suggestions.
>>> outputs = generator("My tart needs some", num_return_sequences=4, return_full_text=False)
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial). You can pass text
generation parameters to this pipeline to control stopping criteria, decoding strategy, and more. Learn more about
text generation parameters in [Text generation strategies](../generation_strategies) and [Text
generation](text_generation).
This language generation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"text-generation"`.
The models that this pipeline can use are models that have been trained with an autoregressive language modeling
objective, which includes the uni-directional models in the library (e.g. openai-community/gpt2). See the list of available models
on [huggingface.co/models](https://huggingface.co/models?filter=text-generation).
"""
XL_PREFIX = """
In 1991, the remains of Russian Tsar Nicholas II and his family (except for Alexei and Maria) are discovered. The
voice of Nicholas's young son, Tsarevich Alexei Nikolaevich, narrates the remainder of the story. 1883 Western
Siberia, a young Grigori Rasputin is asked by his father and a group of men to perform magic. Rasputin has a vision
and denounces one of the men as a horse thief. Although his father initially slaps him for making such an
accusation, Rasputin watches as the man is chased outside and beaten. Twenty years later, Rasputin sees a vision of
the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famous, with people, even a bishop,
begging for his blessing. <eod> </s> <eos>
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.check_model_type(
TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES if self.framework == "tf" else MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
)
if "prefix" not in self._preprocess_params:
prefix = None
if self.model.config.prefix is not None:
prefix = self.model.config.prefix
if prefix is None and self.model.__class__.__name__ in [
"XLNetLMHeadModel",
"TransfoXLLMHeadModel",
"TFXLNetLMHeadModel",
"TFTransfoXLLMHeadModel",
]:
prefix = self.XL_PREFIX
if prefix is not None:
preprocess_params, forward_params, _ = self._sanitize_parameters(prefix=prefix, **self._forward_params)
self._preprocess_params = {**self._preprocess_params, **preprocess_params}
self._forward_params = {**self._forward_params, **forward_params}
def _sanitize_parameters(
self,
return_full_text=None,
return_tensors=None,
return_text=None,
return_type=None,
clean_up_tokenization_spaces=None,
prefix=None,
handle_long_generation=None,
stop_sequence=None,
add_special_tokens=False,
truncation=None,
padding=False,
max_length=None,
**generate_kwargs,
):
preprocess_params = {
"add_special_tokens": add_special_tokens,
"truncation": truncation,
"padding": padding,
"max_length": max_length,
}
if max_length is not None:
generate_kwargs["max_length"] = max_length
if prefix is not None:
preprocess_params["prefix"] = prefix
if prefix:
prefix_inputs = self.tokenizer(
prefix, padding=False, add_special_tokens=add_special_tokens, return_tensors=self.framework
)
generate_kwargs["prefix_length"] = prefix_inputs["input_ids"].shape[-1]
if handle_long_generation is not None:
if handle_long_generation not in {"hole"}:
raise ValueError(
f"{handle_long_generation} is not a valid value for `handle_long_generation` parameter expected"
" [None, 'hole']"
)
preprocess_params["handle_long_generation"] = handle_long_generation
preprocess_params.update(generate_kwargs)
forward_params = generate_kwargs
postprocess_params = {}
if return_full_text is not None and return_type is None:
if return_text is not None:
raise ValueError("`return_text` is mutually exclusive with `return_full_text`")
if return_tensors is not None:
raise ValueError("`return_full_text` is mutually exclusive with `return_tensors`")
return_type = ReturnType.FULL_TEXT if return_full_text else ReturnType.NEW_TEXT
if return_tensors is not None and return_type is None:
if return_text is not None:
raise ValueError("`return_text` is mutually exclusive with `return_tensors`")
return_type = ReturnType.TENSORS
if return_type is not None:
postprocess_params["return_type"] = return_type
if clean_up_tokenization_spaces is not None:
postprocess_params["clean_up_tokenization_spaces"] = clean_up_tokenization_spaces
if stop_sequence is not None:
stop_sequence_ids = self.tokenizer.encode(stop_sequence, add_special_tokens=False)
if len(stop_sequence_ids) > 1:
warnings.warn(
"Stopping on a multiple token sequence is not yet supported on transformers. The first token of"
" the stop sequence will be used as the stop sequence string in the interim."
)
generate_kwargs["eos_token_id"] = stop_sequence_ids[0]
return preprocess_params, forward_params, postprocess_params
def _parse_and_tokenize(self, *args, **kwargs):
"""
Parse arguments and tokenize
"""
if self.model.__class__.__name__ in ["TransfoXLLMHeadModel"]:
kwargs.update({"add_space_before_punct_symbol": True})
return super()._parse_and_tokenize(*args, **kwargs)
def preprocess(
self,
prompt_text,
prefix="",
handle_long_generation=None,
add_special_tokens=False,
truncation=None,
padding=False,
max_length=None,
**generate_kwargs,
):
if isinstance(prompt_text, Chat):
inputs = self.tokenizer.apply_chat_template(
prompt_text.messages,
truncation=truncation,
padding=padding,
max_length=max_length,
add_generation_prompt=True,
return_dict=True,
return_tensors=self.framework,
)
else:
inputs = self.tokenizer(
prefix + prompt_text,
truncation=truncation,
padding=padding,
max_length=max_length,
add_special_tokens=add_special_tokens,
return_tensors=self.framework,
)
inputs["prompt_text"] = prompt_text
if handle_long_generation == "hole":
cur_len = inputs["input_ids"].shape[-1]
if "max_new_tokens" in generate_kwargs:
new_tokens = generate_kwargs["max_new_tokens"]
else:
new_tokens = generate_kwargs.get("max_length", self.model.config.max_length) - cur_len
if new_tokens < 0:
raise ValueError("We cannot infer how many new tokens are expected")
if cur_len + new_tokens > self.tokenizer.model_max_length:
keep_length = self.tokenizer.model_max_length - new_tokens
if keep_length <= 0:
raise ValueError(
"We cannot use `hole` to handle this generation the number of desired tokens exceeds the"
" models max length"
)
inputs["input_ids"] = inputs["input_ids"][:, -keep_length:]
if "attention_mask" in inputs:
inputs["attention_mask"] = inputs["attention_mask"][:, -keep_length:]
return inputs
def _forward(self, model_inputs, **generate_kwargs):
input_ids = model_inputs["input_ids"]
attention_mask = model_inputs.get("attention_mask", None)
if input_ids.shape[1] == 0:
input_ids = None
attention_mask = None
in_b = 1
else:
in_b = input_ids.shape[0]
prompt_text = model_inputs.pop("prompt_text")
prefix_length = generate_kwargs.pop("prefix_length", 0)
if prefix_length > 0:
has_max_new_tokens = "max_new_tokens" in generate_kwargs or (
"generation_config" in generate_kwargs
and generate_kwargs["generation_config"].max_new_tokens is not None
)
if not has_max_new_tokens:
generate_kwargs["max_length"] = generate_kwargs.get("max_length") or self.model.config.max_length
generate_kwargs["max_length"] += prefix_length
has_min_new_tokens = "min_new_tokens" in generate_kwargs or (
"generation_config" in generate_kwargs
and generate_kwargs["generation_config"].min_new_tokens is not None
)
if not has_min_new_tokens and "min_length" in generate_kwargs:
generate_kwargs["min_length"] += prefix_length
generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
out_b = generated_sequence.shape[0]
if self.framework == "pt":
generated_sequence = generated_sequence.reshape(in_b, out_b // in_b, *generated_sequence.shape[1:])
elif self.framework == "tf":
generated_sequence = tf.reshape(generated_sequence, (in_b, out_b // in_b, *generated_sequence.shape[1:]))
return {"generated_sequence": generated_sequence, "input_ids": input_ids, "prompt_text": prompt_text}
def postprocess(self, model_outputs, return_type=ReturnType.FULL_TEXT, clean_up_tokenization_spaces=True):
generated_sequence = model_outputs["generated_sequence"][0]
input_ids = model_outputs["input_ids"]
prompt_text = model_outputs["prompt_text"]
generated_sequence = generated_sequence.numpy().tolist()
records = []
for sequence in generated_sequence:
if return_type == ReturnType.TENSORS:
record = {"generated_token_ids": sequence}
elif return_type in {ReturnType.NEW_TEXT, ReturnType.FULL_TEXT}:
text = self.tokenizer.decode(
sequence,
skip_special_tokens=True,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
)
if input_ids is None:
prompt_length = 0
else:
prompt_length = len(
self.tokenizer.decode(
input_ids[0],
skip_special_tokens=True,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
)
)
all_text = text[prompt_length:]
if return_type == ReturnType.FULL_TEXT:
if isinstance(prompt_text, str):
all_text = prompt_text + all_text
elif isinstance(prompt_text, Chat):
all_text = prompt_text.messages + [{"role": "assistant", "content": all_text}]
record = {"generated_text": all_text}
records.append(record)
return records
.\pipelines\text_to_audio.py
from typing import List, Union
from ..utils import is_torch_available
from .base import Pipeline
if is_torch_available():
from ..models.auto.modeling_auto import MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING
from ..models.speecht5.modeling_speecht5 import SpeechT5HifiGan
DEFAULT_VOCODER_ID = "microsoft/speecht5_hifigan"
class TextToAudioPipeline(Pipeline):
"""
文本到音频生成管道,使用任意 `AutoModelForTextToWaveform` 或 `AutoModelForTextToSpectrogram` 模型。
该管道从输入文本生成音频文件,并可选地接收其他条件输入。
Example:
```
>>> from transformers import pipeline
>>> pipe = pipeline(model="suno/bark-small")
>>> output = pipe("Hey it's HuggingFace on the phone!")
>>> audio = output["audio"]
>>> sampling_rate = output["sampling_rate"]
```
了解如何使用管道的基础知识,参见[pipeline tutorial](../pipeline_tutorial)
<Tip>
可以通过使用 [`TextToAudioPipeline.__call__.forward_params`] 或 [`TextToAudioPipeline.__call__.generate_kwargs`] 来指定传递给模型的参数。
Example:
```
>>> from transformers import pipeline
>>> music_generator = pipeline(task="text-to-audio", model="facebook/musicgen-small", framework="pt")
>>> # 通过使用较高的温度添加随机性来增强音乐生成,并设置最大音乐长度
>>> generate_kwargs = {
... "do_sample": True,
... "temperature": 0.7,
... "max_new_tokens": 35,
... }
>>> outputs = music_generator("Techno music with high melodic riffs", generate_kwargs=generate_kwargs)
```
</Tip>
目前可以通过 [`pipeline`] 加载此管道,使用以下任务标识符:"text-to-speech" 或 "text-to-audio"。
查看 [huggingface.co/models](https://huggingface.co/models?filter=text-to-speech) 上可用模型列表。
"""
def __init__(self, *args, vocoder=None, sampling_rate=None, **kwargs):
super().__init__(*args, **kwargs)
if self.framework == "tf":
raise ValueError("The TextToAudioPipeline is only available in PyTorch.")
self.vocoder = None
if self.model.__class__ in MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING.values():
self.vocoder = (
SpeechT5HifiGan.from_pretrained(DEFAULT_VOCODER_ID).to(self.model.device)
if vocoder is None
else vocoder
)
self.sampling_rate = sampling_rate
if self.vocoder is not None:
self.sampling_rate = self.vocoder.config.sampling_rate
if self.sampling_rate is None:
config = self.model.config
gen_config = self.model.__dict__.get("generation_config", None)
if gen_config is not None:
config.update(gen_config.to_dict())
for sampling_rate_name in ["sample_rate", "sampling_rate"]:
sampling_rate = getattr(config, sampling_rate_name, None)
if sampling_rate is not None:
self.sampling_rate = sampling_rate
def preprocess(self, text, **kwargs):
if isinstance(text, str):
text = [text]
if self.model.config.model_type == "bark":
new_kwargs = {
"max_length": self.model.generation_config.semantic_config.get("max_input_semantic_length", 256),
"add_special_tokens": False,
"return_attention_mask": True,
"return_token_type_ids": False,
"padding": "max_length",
}
new_kwargs.update(kwargs)
kwargs = new_kwargs
output = self.tokenizer(text, **kwargs, return_tensors="pt")
return output
def _forward(self, model_inputs, **kwargs):
kwargs = self._ensure_tensor_on_device(kwargs, device=self.device)
forward_params = kwargs["forward_params"]
generate_kwargs = kwargs["generate_kwargs"]
if self.model.can_generate():
generate_kwargs = self._ensure_tensor_on_device(generate_kwargs, device=self.device)
forward_params.update(generate_kwargs)
output = self.model.generate(**model_inputs, **forward_params)
else:
if len(generate_kwargs):
raise ValueError(
f"""You're using the `TextToAudioPipeline` with a forward-only model, but `generate_kwargs` is non empty.
For forward-only TTA models, please use `forward_params` instead of of
`generate_kwargs`. For reference, here are the `generate_kwargs` used here:
{generate_kwargs.keys()}"""
)
output = self.model(**model_inputs, **forward_params)[0]
if self.vocoder is not None:
output = self.vocoder(output)
return output
def __call__(self, text_inputs: Union[str, List[str]], **forward_params):
"""
从输入文本生成语音/音频。详细信息请参阅 [`TextToAudioPipeline`] 文档。
Args:
text_inputs (`str` or `List[str]`):
要生成的文本或文本列表。
forward_params (`dict`, *可选*):
传递给模型生成/前向方法的参数。`forward_params` 总是传递给底层模型。
Return:
`dict` 或 `list` of `dict`: 返回的字典包含两个键值对:
- **audio** (`np.ndarray` of shape `(nb_channels, audio_length)`) -- 生成的音频波形。
- **sampling_rate** (`int`) -- 生成的音频波形的采样率。
"""
return super().__call__(text_inputs, **forward_params)
def _sanitize_parameters(
self,
preprocess_params=None,
forward_params=None,
generate_kwargs=None,
):
):
params = {
"forward_params": forward_params if forward_params else {},
"generate_kwargs": generate_kwargs if generate_kwargs else {},
}
if preprocess_params is None:
preprocess_params = {}
postprocess_params = {}
return preprocess_params, params, postprocess_params
def postprocess(self, waveform):
output_dict = {}
output_dict["audio"] = waveform.cpu().float().numpy()
output_dict["sampling_rate"] = self.sampling_rate
return output_dict
.\pipelines\token_classification.py
import types
import warnings
from typing import List, Optional, Tuple, Union
import numpy as np
from ..models.bert.tokenization_bert import BasicTokenizer
from ..utils import (
ExplicitEnum,
add_end_docstrings,
is_tf_available,
is_torch_available,
)
from .base import ArgumentHandler, ChunkPipeline, Dataset, build_pipeline_init_args
if is_tf_available():
import tensorflow as tf
from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES
if is_torch_available():
from ..models.auto.modeling_auto import MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES
class TokenClassificationArgumentHandler(ArgumentHandler):
"""
Handles arguments for token classification.
"""
def __call__(self, inputs: Union[str, List[str]], **kwargs):
if inputs is not None and isinstance(inputs, (list, tuple)) and len(inputs) > 0:
inputs = list(inputs)
batch_size = len(inputs)
elif isinstance(inputs, str):
inputs = [inputs]
batch_size = 1
elif Dataset is not None and isinstance(inputs, Dataset) or isinstance(inputs, types.GeneratorType):
return inputs, None
else:
raise ValueError("At least one input is required.")
offset_mapping = kwargs.get("offset_mapping")
if offset_mapping:
if isinstance(offset_mapping, list) and isinstance(offset_mapping[0], tuple):
offset_mapping = [offset_mapping]
if len(offset_mapping) != batch_size:
raise ValueError("offset_mapping should have the same batch size as the input")
return inputs, offset_mapping
class AggregationStrategy(ExplicitEnum):
"""All the valid aggregation strategies for TokenClassificationPipeline"""
NONE = "none"
SIMPLE = "simple"
FIRST = "first"
AVERAGE = "average"
MAX = "max"
@add_end_docstrings(
build_pipeline_init_args(has_tokenizer=True),
r"""
ignore_labels (`List[str]`, defaults to `["O"]`):
A list of labels to ignore.
grouped_entities (`bool`, *optional*, defaults to `False`):
DEPRECATED, use `aggregation_strategy` instead. Whether or not to group the tokens corresponding to the
same entity together in the predictions or not.
stride (`int`, *optional*):
If stride is provided, the pipeline is applied on all the text. The text is split into chunks of size
model_max_length. Works only with fast tokenizers and `aggregation_strategy` different from `NONE`. The
value of this argument defines the number of overlapping tokens between chunks. In other words, the model
will shift forward by `tokenizer.model_max_length - stride` tokens each step.
aggregation_strategy (`str`, *optional*, defaults to `"none"`):
The strategy to fuse (or not) tokens based on the model prediction.
- "none" : Will simply not do any aggregation and simply return raw results from the model
- "simple" : Will attempt to group entities following the default schema. (A, B-TAG), (B, I-TAG), (C,
I-TAG), (D, B-TAG2) (E, B-TAG2) will end up being [{"word": ABC, "entity": "TAG"}, {"word": "D",
"entity": "TAG2"}, {"word": "E", "entity": "TAG2"}] Notice that two consecutive B tags will end up as
different entities. On word based languages, we might end up splitting words undesirably : Imagine
Microsoft being tagged as [{"word": "Micro", "entity": "ENTERPRISE"}, {"word": "soft", "entity":
"NAME"}]. Look for FIRST, MAX, AVERAGE for ways to mitigate that and disambiguate words (on languages
that support that meaning, which is basically tokens separated by a space). These mitigations will
only work on real words, "New york" might still be tagged with two different entities.
- "first" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
end up with different tags. Words will simply use the tag of the first token of the word when there
is ambiguity.
- "average" : (works only on word based models) Will use the `SIMPLE` strategy except that words,
cannot end up with different tags. scores will be averaged first across tokens, and then the maximum
label is applied.
- "max" : (works only on word based models) Will use the `SIMPLE` strategy except that words, cannot
end up with different tags. Word entity will simply be the token with the maximum score.
"""
)
class TokenClassificationPipeline(ChunkPipeline):
"""
Named Entity Recognition pipeline using any `ModelForTokenClassification`. See the [named entity recognition
examples](../task_summary#named-entity-recognition) for more information.
Example:
```
>>> from transformers import pipeline
>>> token_classifier = pipeline(model="Jean-Baptiste/camembert-ner", aggregation_strategy="simple")
>>> sentence = "Je m'appelle jean-baptiste et je vis à montréal"
>>> tokens = token_classifier(sentence)
>>> tokens
[{'entity_group': 'PER', 'score': 0.9931, 'word': 'jean-baptiste', 'start': 12, 'end': 26}, {'entity_group': 'LOC', 'score': 0.998, 'word': 'montréal', 'start': 38, 'end': 47}]
>>> token = tokens[0]
>>> # Start and end provide an easy way to highlight words in the original text.
>>> sentence[token["start"] : token["end"]]
' jean-baptiste'
>>> # Some models use the same idea to do part of speech.
>>> syntaxer = pipeline(model="vblagoje/bert-english-uncased-finetuned-pos", aggregation_strategy="simple")
>>> syntaxer("My name is Sarah and I live in London")
[{'entity_group': 'PRON', 'score': 0.999, 'word': 'my', 'start': 0, 'end': 2}, {'entity_group': 'NOUN', 'score': 0.997, 'word': 'name', 'start': 3, 'end': 7}, {'entity_group': 'AUX', 'score': 0.994, 'word': 'is', 'start': 8, 'end': 10}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'sarah', 'start': 11, 'end': 16}, {'entity_group': 'CCONJ', 'score': 0.999, 'word': 'and', 'start': 17, 'end': 20}, {'entity_group': 'PRON', 'score': 0.999, 'word': 'i', 'start': 21, 'end': 22}, {'entity_group': 'VERB', 'score': 0.998, 'word': 'live', 'start': 23, 'end': 27}, {'entity_group': 'ADP', 'score': 0.999, 'word': 'in', 'start': 28, 'end': 30}, {'entity_group': 'PROPN', 'score': 0.999, 'word': 'london', 'start': 31, 'end': 37}]
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This token recognition pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"ner"` (for predicting the classes of tokens in a sequence: person, organisation, location or miscellaneous).
The models that this pipeline can use are models that have been fine-tuned on a token classification task. See the
up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=token-classification).
"""
default_input_names = "sequences"
def __init__(self, args_parser=TokenClassificationArgumentHandler(), *args, **kwargs):
super().__init__(*args, **kwargs)
self.check_model_type(
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES
if self.framework == "tf"
else MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
self._basic_tokenizer = BasicTokenizer(do_lower_case=False)
self._args_parser = args_parser
def _sanitize_parameters(
self,
ignore_labels=None,
grouped_entities: Optional[bool] = None,
ignore_subwords: Optional[bool] = None,
aggregation_strategy: Optional[AggregationStrategy] = None,
offset_mapping: Optional[List[Tuple[int, int]]] = None,
stride: Optional[int] = None,
):
"""
实现 `__call__` 方法,用于对给定的文本输入进行令牌分类。
Args:
inputs (`str` or `List[str]`):
一个或多个文本(或文本列表)用于令牌分类。
Return:
A list or a list of list of `dict`: 每个结果都作为一个字典列表返回(每个输入的每个令牌,或者如果此管道是
使用了聚合策略实例化,则每个实体都对应一个字典)具有以下键:
- **word** (`str`) -- 被分类的令牌/单词。这是通过解码所选令牌获得的。如果要获得原始句子中的确切字符串,请使用 `start` 和 `end`。
- **score** (`float`) -- `entity` 的相应概率。
- **entity** (`str`) -- 预测的令牌/单词的实体(当 *aggregation_strategy* 不是 `"none"` 时命名为 *entity_group*)。
- **index** (`int`, 仅在 `aggregation_strategy="none"` 时存在) -- 句子中对应令牌的索引。
- **start** (`int`, *可选*) -- 句子中对应实体的起始索引。仅在 tokenizer 中可用偏移时存在。
- **end** (`int`, *可选*) -- 句子中对应实体的结束索引。仅在 tokenizer 中可用偏移时存在。
"""
_inputs, offset_mapping = self._args_parser(inputs, **kwargs)
if offset_mapping:
kwargs["offset_mapping"] = offset_mapping
return super().__call__(inputs, **kwargs)
def preprocess(self, sentence, offset_mapping=None, **preprocess_params):
tokenizer_params = preprocess_params.pop("tokenizer_params", {})
truncation = True if self.tokenizer.model_max_length and self.tokenizer.model_max_length > 0 else False
inputs = self.tokenizer(
sentence,
return_tensors=self.framework,
truncation=truncation,
return_special_tokens_mask=True,
return_offsets_mapping=self.tokenizer.is_fast,
**tokenizer_params,
)
inputs.pop("overflow_to_sample_mapping", None)
num_chunks = len(inputs["input_ids"])
for i in range(num_chunks):
if self.framework == "tf":
model_inputs = {k: tf.expand_dims(v[i], 0) for k, v in inputs.items()}
else:
model_inputs = {k: v[i].unsqueeze(0) for k, v in inputs.items()}
if offset_mapping is not None:
model_inputs["offset_mapping"] = offset_mapping
model_inputs["sentence"] = sentence if i == 0 else None
model_inputs["is_last"] = i == num_chunks - 1
yield model_inputs
def _forward(self, model_inputs):
special_tokens_mask = model_inputs.pop("special_tokens_mask")
offset_mapping = model_inputs.pop("offset_mapping", None)
sentence = model_inputs.pop("sentence")
is_last = model_inputs.pop("is_last")
if self.framework == "tf":
logits = self.model(**model_inputs)[0]
else:
output = self.model(**model_inputs)
logits = output["logits"] if isinstance(output, dict) else output[0]
return {
"logits": logits,
"special_tokens_mask": special_tokens_mask,
"offset_mapping": offset_mapping,
"sentence": sentence,
"is_last": is_last,
**model_inputs,
}
def postprocess(self, all_outputs, aggregation_strategy=AggregationStrategy.NONE, ignore_labels=None):
if ignore_labels is None:
ignore_labels = ["O"]
all_entities = []
for model_outputs in all_outputs:
logits = model_outputs["logits"][0].numpy()
sentence = all_outputs[0]["sentence"]
input_ids = model_outputs["input_ids"][0]
offset_mapping = (
model_outputs["offset_mapping"][0] if model_outputs["offset_mapping"] is not None else None
)
special_tokens_mask = model_outputs["special_tokens_mask"][0].numpy()
maxes = np.max(logits, axis=-1, keepdims=True)
shifted_exp = np.exp(logits - maxes)
scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
if self.framework == "tf":
input_ids = input_ids.numpy()
offset_mapping = offset_mapping.numpy() if offset_mapping is not None else None
pre_entities = self.gather_pre_entities(
sentence, input_ids, scores, offset_mapping, special_tokens_mask, aggregation_strategy
)
grouped_entities = self.aggregate(pre_entities, aggregation_strategy)
entities = [
entity
for entity in grouped_entities
if entity.get("entity", None) not in ignore_labels
and entity.get("entity_group", None) not in ignore_labels
]
all_entities.extend(entities)
num_chunks = len(all_outputs)
if num_chunks > 1:
all_entities = self.aggregate_overlapping_entities(all_entities)
return all_entities
def aggregate_overlapping_entities(self, entities):
if len(entities) == 0:
return entities
entities = sorted(entities, key=lambda x: x["start"])
aggregated_entities = []
previous_entity = entities[0]
for entity in entities:
if previous_entity["start"] <= entity["start"] < previous_entity["end"]:
current_length = entity["end"] - entity["start"]
previous_length = previous_entity["end"] - previous_entity["start"]
if current_length > previous_length:
previous_entity = entity
elif current_length == previous_length and entity["score"] > previous_entity["score"]:
previous_entity = entity
else:
aggregated_entities.append(previous_entity)
previous_entity = entity
aggregated_entities.append(previous_entity)
return aggregated_entities
def gather_pre_entities(
self,
sentence: str,
input_ids: np.ndarray,
scores: np.ndarray,
offset_mapping: Optional[List[Tuple[int, int]]],
special_tokens_mask: np.ndarray,
aggregation_strategy: AggregationStrategy,
) -> List[dict]:
"""Fuse various numpy arrays into dicts with all the information needed for aggregation"""
pre_entities = []
for idx, token_scores in enumerate(scores):
if special_tokens_mask[idx]:
continue
word = self.tokenizer.convert_ids_to_tokens(int(input_ids[idx]))
if offset_mapping is not None:
start_ind, end_ind = offset_mapping[idx]
if not isinstance(start_ind, int):
if self.framework == "pt":
start_ind = start_ind.item()
end_ind = end_ind.item()
word_ref = sentence[start_ind:end_ind]
if getattr(self.tokenizer, "_tokenizer", None) and getattr(
self.tokenizer._tokenizer.model, "continuing_subword_prefix", None
):
is_subword = len(word) != len(word_ref)
else:
if aggregation_strategy in {
AggregationStrategy.FIRST,
AggregationStrategy.AVERAGE,
AggregationStrategy.MAX,
}:
warnings.warn(
"Tokenizer does not support real words, using fallback heuristic",
UserWarning,
)
is_subword = start_ind > 0 and " " not in sentence[start_ind - 1 : start_ind + 1]
if int(input_ids[idx]) == self.tokenizer.unk_token_id:
word = word_ref
is_subword = False
else:
start_ind = None
end_ind = None
is_subword = False
pre_entity = {
"word": word,
"scores": token_scores,
"start": start_ind,
"end": end_ind,
"index": idx,
"is_subword": is_subword,
}
pre_entities.append(pre_entity)
return pre_entities
def aggregate(self, pre_entities: List[dict], aggregation_strategy: AggregationStrategy) -> List[dict]:
if aggregation_strategy in {AggregationStrategy.NONE, AggregationStrategy.SIMPLE}:
entities = []
for pre_entity in pre_entities:
entity_idx = pre_entity["scores"].argmax()
score = pre_entity["scores"][entity_idx]
entity = {
"entity": self.model.config.id2label[entity_idx],
"score": score,
"index": pre_entity["index"],
"word": pre_entity["word"],
"start": pre_entity["start"],
"end": pre_entity["end"],
}
entities.append(entity)
else:
entities = self.aggregate_words(pre_entities, aggregation_strategy)
if aggregation_strategy == AggregationStrategy.NONE:
return entities
return self.group_entities(entities)
def aggregate_word(self, entities: List[dict], aggregation_strategy: AggregationStrategy) -> dict:
word = self.tokenizer.convert_tokens_to_string([entity["word"] for entity in entities])
if aggregation_strategy == AggregationStrategy.FIRST:
scores = entities[0]["scores"]
idx = scores.argmax()
score = scores[idx]
entity = self.model.config.id2label[idx]
elif aggregation_strategy == AggregationStrategy.MAX:
max_entity = max(entities, key=lambda entity: entity["scores"].max())
scores = max_entity["scores"]
idx = scores.argmax()
score = scores[idx]
entity = self.model.config.id2label[idx]
elif aggregation_strategy == AggregationStrategy.AVERAGE:
scores = np.stack([entity["scores"] for entity in entities])
average_scores = np.nanmean(scores, axis=0)
entity_idx = average_scores.argmax()
entity = self.model.config.id2label[entity_idx]
score = average_scores[entity_idx]
else:
raise ValueError("Invalid aggregation_strategy")
new_entity = {
"entity": entity,
"score": score,
"word": word,
"start": entities[0]["start"],
"end": entities[-1]["end"],
}
return new_entity
def aggregate_words(self, entities: List[dict], aggregation_strategy: AggregationStrategy) -> List[dict]:
"""
Override tokens from a given word that disagree to force agreement on word boundaries.
Example: micro|soft| com|pany| B-ENT I-NAME I-ENT I-ENT will be rewritten with first strategy as microsoft|
company| B-ENT I-ENT
"""
if aggregation_strategy in {
AggregationStrategy.NONE,
AggregationStrategy.SIMPLE,
}:
raise ValueError("NONE and SIMPLE strategies are invalid for word aggregation")
word_entities = []
word_group = None
for entity in entities:
if word_group is None:
word_group = [entity]
elif entity["is_subword"]:
word_group.append(entity)
else:
word_entities.append(self.aggregate_word(word_group, aggregation_strategy))
word_group = [entity]
if word_group is not None:
word_entities.append(self.aggregate_word(word_group, aggregation_strategy))
return word_entities
def group_sub_entities(self, entities: List[dict]) -> dict:
"""
Group together the adjacent tokens with the same entity predicted.
Args:
entities (`dict`): The entities predicted by the pipeline.
"""
entity = entities[0]["entity"].split("-", 1)[-1]
scores = np.nanmean([entity["score"] for entity in entities])
tokens = [entity["word"] for entity in entities]
entity_group = {
"entity_group": entity,
"score": np.mean(scores),
"word": self.tokenizer.convert_tokens_to_string(tokens),
"start": entities[0]["start"],
"end": entities[-1]["end"],
}
return entity_group
def get_tag(self, entity_name: str) -> Tuple[str, str]:
if entity_name.startswith("B-"):
bi = "B"
tag = entity_name[2:]
elif entity_name.startswith("I-"):
bi = "I"
tag = entity_name[2:]
else:
bi = "I"
tag = entity_name
return bi, tag
def group_entities(self, entities: List[dict]) -> List[dict]:
"""
Find and group together the adjacent tokens with the same entity predicted.
Args:
entities (`dict`): The entities predicted by the pipeline.
"""
entity_groups = []
entity_group_disagg = []
for entity in entities:
if not entity_group_disagg:
entity_group_disagg.append(entity)
continue
bi, tag = self.get_tag(entity["entity"])
last_bi, last_tag = self.get_tag(entity_group_disagg[-1]["entity"])
if tag == last_tag and bi != "B":
entity_group_disagg.append(entity)
else:
entity_groups.append(self.group_sub_entities(entity_group_disagg))
entity_group_disagg = [entity]
if entity_group_disagg:
entity_groups.append(self.group_sub_entities(entity_group_disagg))
return entity_groups
NerPipeline = TokenClassificationPipeline
.\pipelines\video_classification.py
from io import BytesIO
from typing import List, Union
import requests
from ..utils import add_end_docstrings, is_decord_available, is_torch_available, logging, requires_backends
from .base import Pipeline, build_pipeline_init_args
if is_decord_available():
import numpy as np
from decord import VideoReader
if is_torch_available():
from ..models.auto.modeling_auto import MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES
logger = logging.get_logger(__name__)
@add_end_docstrings(build_pipeline_init_args(has_image_processor=True))
class VideoClassificationPipeline(Pipeline):
"""
Video classification pipeline using any `AutoModelForVideoClassification`. This pipeline predicts the class of a
video.
This video classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"video-classification"`.
See the list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=video-classification).
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
requires_backends(self, "decord")
self.check_model_type(MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES)
def _sanitize_parameters(self, top_k=None, num_frames=None, frame_sampling_rate=None):
preprocess_params = {}
if frame_sampling_rate is not None:
preprocess_params["frame_sampling_rate"] = frame_sampling_rate
if num_frames is not None:
preprocess_params["num_frames"] = num_frames
postprocess_params = {}
if top_k is not None:
postprocess_params["top_k"] = top_k
return preprocess_params, {}, postprocess_params
def __call__(self, videos: Union[str, List[str]], **kwargs):
"""
将标签分配给作为输入传递的视频。
Args:
videos (`str`, `List[str]`):
管道处理三种类型的视频:
- 包含指向视频的 HTTP 链接的字符串
- 包含指向视频的本地路径的字符串
管道接受单个视频或视频批处理,必须作为字符串传递。
批处理中的所有视频必须具有相同的格式:全部是 HTTP 链接或全部是本地路径。
top_k (`int`, *可选*, 默认为 5):
管道将返回的顶部标签数。如果提供的数字高于模型配置中可用的标签数,将默认为标签数。
num_frames (`int`, *可选*, 默认为 `self.model.config.num_frames`):
从视频中抽样的帧数。如果未提供,则默认为模型配置中指定的帧数。
frame_sampling_rate (`int`, *可选*, 默认为 1):
用于从视频中选择帧的采样率。如果未提供,则默认为 1,即每帧都将被使用。
Return:
包含结果的字典或字典列表。如果输入为单个视频,则返回一个字典;如果输入为多个视频,则返回相应的字典列表。
字典包含以下键:
- **label** (`str`) -- 模型识别的标签。
- **score** (`int`) -- 模型为该标签分配的分数。
"""
return super().__call__(videos, **kwargs)
def preprocess(self, video, num_frames=None, frame_sampling_rate=1):
"""
预处理视频以用于模型输入。
Args:
video (`str` or `BytesIO`):
视频的路径或 BytesIO 对象。
num_frames (`int`, *可选*):
从视频中抽样的帧数。如果未提供,则默认为 self.model.config.num_frames。
frame_sampling_rate (`int`, *可选*, 默认为 1):
用于从视频中选择帧的采样率。
Returns:
模型输入的字典表示形式。
Raises:
ValueError: 如果视频格式不支持或无法识别。
"""
if num_frames is None:
num_frames = self.model.config.num_frames
if video.startswith("http://") or video.startswith("https://"):
video = BytesIO(requests.get(video).content)
videoreader = VideoReader(video)
videoreader.seek(0)
start_idx = 0
end_idx = num_frames * frame_sampling_rate - 1
indices = np.linspace(start_idx, end_idx, num=num_frames, dtype=np.int64)
video = videoreader.get_batch(indices).asnumpy()
video = list(video)
model_inputs = self.image_processor(video, return_tensors=self.framework)
return model_inputs
def _forward(self, model_inputs):
"""
将模型输入传递给模型并获取模型输出。
Args:
model_inputs:
模型的输入数据字典。
Returns:
模型的输出结果。
"""
model_outputs = self.model(**model_inputs)
return model_outputs
def postprocess(self, model_outputs, top_k=5):
if top_k > self.model.config.num_labels:
top_k = self.model.config.num_labels
if self.framework == "pt":
probs = model_outputs.logits.softmax(-1)[0]
scores, ids = probs.topk(top_k)
else:
raise ValueError(f"Unsupported framework: {self.framework}")
scores = scores.tolist()
ids = ids.tolist()
return [{"score": score, "label": self.model.config.id2label[_id]} for score, _id in zip(scores, ids)]
.\pipelines\visual_question_answering.py
from typing import Union
from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging
from .base import Pipeline, build_pipeline_init_args
if is_vision_available():
from PIL import Image
from ..image_utils import load_image
if is_torch_available():
from ..models.auto.modeling_auto import MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES
logger = logging.get_logger(__name__)
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True, has_image_processor=True))
class VisualQuestionAnsweringPipeline(Pipeline):
"""
Visual Question Answering pipeline using a `AutoModelForVisualQuestionAnswering`. This pipeline is currently only
available in PyTorch.
Example:
```
>>> from transformers import pipeline
>>> oracle = pipeline(model="dandelin/vilt-b32-finetuned-vqa")
>>> image_url = "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/lena.png"
>>> oracle(question="What is she wearing ?", image=image_url)
[{'score': 0.948, 'answer': 'hat'}, {'score': 0.009, 'answer': 'fedora'}, {'score': 0.003, 'answer': 'clothes'}, {'score': 0.003, 'answer': 'sun hat'}, {'score': 0.002, 'answer': 'nothing'}]
>>> oracle(question="What is she wearing ?", image=image_url, top_k=1)
[{'score': 0.948, 'answer': 'hat'}]
>>> oracle(question="Is this a person ?", image=image_url, top_k=1)
[{'score': 0.993, 'answer': 'yes'}]
>>> oracle(question="Is this a man ?", image=image_url, top_k=1)
[{'score': 0.996, 'answer': 'no'}]
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This visual question answering pipeline can currently be loaded from [`pipeline`] using the following task
identifiers: `"visual-question-answering", "vqa"`.
The models that this pipeline can use are models that have been fine-tuned on a visual question answering task. See
the up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=visual-question-answering).
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.check_model_type(MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES)
def _sanitize_parameters(self, top_k=None, padding=None, truncation=None, timeout=None, **kwargs):
preprocess_params, postprocess_params = {}, {}
if padding is not None:
preprocess_params["padding"] = padding
if truncation is not None:
preprocess_params["truncation"] = truncation
if timeout is not None:
preprocess_params["timeout"] = timeout
if top_k is not None:
postprocess_params["top_k"] = top_k
return preprocess_params, {}, postprocess_params
def __call__(self, image: Union["Image.Image", str], question: str = None, **kwargs):
r"""
Answers open-ended questions about images. The pipeline accepts several types of inputs which are detailed
below:
- `pipeline(image=image, question=question)`
- `pipeline({"image": image, "question": question})`
- `pipeline([{"image": image, "question": question}])`
- `pipeline([{"image": image, "question": question}, {"image": image, "question": question}])`
Args:
image (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
The pipeline handles three types of images:
- A string containing a http link pointing to an image
- A string containing a local path to an image
- An image loaded in PIL directly
The pipeline accepts either a single image or a batch of images. If given a single image, it can be
broadcasted to multiple questions.
question (`str`, `List[str]`):
The question(s) asked. If given a single question, it can be broadcasted to multiple images.
top_k (`int`, *optional*, defaults to 5):
The number of top labels that will be returned by the pipeline. If the provided number is higher than
the number of labels available in the model configuration, it will default to the number of labels.
timeout (`float`, *optional*, defaults to None):
The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
the call may block forever.
Return:
A dictionary or a list of dictionaries containing the result. The dictionaries contain the following keys:
- **label** (`str`) -- The label identified by the model.
- **score** (`int`) -- The score attributed by the model for that label.
"""
if isinstance(image, (Image.Image, str)) and isinstance(question, str):
inputs = {"image": image, "question": question}
else:
"""
如果输入不符合上述条件,支持以下格式:
- {"image": image, "question": question}
- [{"image": image, "question": question}]
- 生成器和数据集
"""
inputs = image
results = super().__call__(inputs, **kwargs)
return results
def preprocess(self, inputs, padding=False, truncation=False, timeout=None):
image = load_image(inputs["image"], timeout=timeout)
model_inputs = self.tokenizer(
inputs["question"], return_tensors=self.framework, padding=padding, truncation=truncation
)
image_features = self.image_processor(images=image, return_tensors=self.framework)
model_inputs.update(image_features)
return model_inputs
def _forward(self, model_inputs, **generate_kwargs):
if self.model.can_generate():
model_outputs = self.model.generate(**model_inputs, **generate_kwargs)
else:
model_outputs = self.model(**model_inputs)
return model_outputs
def postprocess(self, model_outputs, top_k=5):
if self.model.can_generate():
return [
{"answer": self.tokenizer.decode(output_ids, skip_special_tokens=True).strip()}
for output_ids in model_outputs
]
else:
if top_k > self.model.config.num_labels:
top_k = self.model.config.num_labels
if self.framework == "pt":
probs = model_outputs.logits.sigmoid()[0]
scores, ids = probs.topk(top_k)
else:
raise ValueError(f"Unsupported framework: {self.framework}")
scores = scores.tolist()
ids = ids.tolist()
return [{"score": score, "answer": self.model.config.id2label[_id]} for score, _id in zip(scores, ids)]
.\pipelines\zero_shot_audio_classification.py
from collections import UserDict
from typing import Union
import numpy as np
import requests
from ..utils import (
add_end_docstrings,
logging,
)
from .audio_classification import ffmpeg_read
from .base import Pipeline, build_pipeline_init_args
logger = logging.get_logger(__name__)
@add_end_docstrings(build_pipeline_init_args(has_feature_extractor=True, has_tokenizer=True))
class ZeroShotAudioClassificationPipeline(Pipeline):
"""
Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
provide an audio and a set of `candidate_labels`.
Example:
```
>>> from transformers import pipeline
>>> from datasets import load_dataset
>>> dataset = load_dataset("ashraq/esc50")
>>> audio = next(iter(dataset["train"]["audio"]))["array"]
>>> classifier = pipeline(task="zero-shot-audio-classification", model="laion/clap-htsat-unfused")
>>> classifier(audio, candidate_labels=["Sound of a dog", "Sound of vaccum cleaner"])
[{'score': 0.9996, 'label': 'Sound of a dog'}, {'score': 0.0004, 'label': 'Sound of vaccum cleaner'}]
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial) This audio
classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-audio-classification"`. See the list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-audio-classification).
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
if self.framework != "pt":
raise ValueError(f"The {self.__class__} is only available in PyTorch.")
def __call__(self, audios: Union[np.ndarray, bytes, str], **kwargs):
"""
Assign labels to the audio(s) passed as inputs.
Args:
audios (`str`, `List[str]`, `np.array` or `List[np.array]`):
The pipeline handles three types of inputs:
- A string containing a http link pointing to an audio
- A string containing a local path to an audio
- An audio loaded in numpy
candidate_labels (`List[str]`):
The candidate labels for this audio
hypothesis_template (`str`, *optional*, defaults to `"This is a sound of {}"`):
The sentence used in conjunction with *candidate_labels* to attempt the audio classification by
replacing the placeholder with the candidate_labels. Then likelihood is estimated by using
logits_per_audio
Return:
A list of dictionaries containing result, one dictionary per proposed label. The dictionaries contain the
following keys:
- **label** (`str`) -- The label identified by the model. It is one of the suggested `candidate_label`.
- **score** (`float`) -- The score attributed by the model for that label (between 0 and 1).
"""
return super().__call__(audios, **kwargs)
def _sanitize_parameters(self, **kwargs):
preprocess_params = {}
if "candidate_labels" in kwargs:
preprocess_params["candidate_labels"] = kwargs["candidate_labels"]
if "hypothesis_template" in kwargs:
preprocess_params["hypothesis_template"] = kwargs["hypothesis_template"]
return preprocess_params, {}, {}
def preprocess(self, audio, candidate_labels=None, hypothesis_template="This is a sound of {}."):
if isinstance(audio, str):
if audio.startswith("http://") or audio.startswith("https://"):
audio = requests.get(audio).content
else:
with open(audio, "rb") as f:
audio = f.read()
if isinstance(audio, bytes):
audio = ffmpeg_read(audio, self.feature_extractor.sampling_rate)
if not isinstance(audio, np.ndarray):
raise ValueError("We expect a numpy ndarray as input")
if len(audio.shape) != 1:
raise ValueError("We expect a single channel audio input for ZeroShotAudioClassificationPipeline")
inputs = self.feature_extractor(
[audio], sampling_rate=self.feature_extractor.sampling_rate, return_tensors="pt"
)
inputs["candidate_labels"] = candidate_labels
sequences = [hypothesis_template.format(x) for x in candidate_labels]
text_inputs = self.tokenizer(sequences, return_tensors=self.framework, padding=True)
inputs["text_inputs"] = [text_inputs]
return inputs
def _forward(self, model_inputs):
candidate_labels = model_inputs.pop("candidate_labels")
text_inputs = model_inputs.pop("text_inputs")
if isinstance(text_inputs[0], UserDict):
text_inputs = text_inputs[0]
else:
text_inputs = text_inputs[0][0]
outputs = self.model(**text_inputs, **model_inputs)
model_outputs = {
"candidate_labels": candidate_labels,
"logits": outputs.logits_per_audio,
}
return model_outputs
def postprocess(self, model_outputs):
candidate_labels = model_outputs.pop("candidate_labels")
logits = model_outputs["logits"][0]
if self.framework == "pt":
probs = logits.softmax(dim=0)
scores = probs.tolist()
else:
raise ValueError("`tf` framework not supported.")
result = [
{"score": score, "label": candidate_label}
for score, candidate_label in sorted(zip(scores, candidate_labels), key=lambda x: -x[0])
]
return result
.\pipelines\zero_shot_classification.py
import inspect
from typing import List, Union
import numpy as np
from ..tokenization_utils import TruncationStrategy
from ..utils import add_end_docstrings, logging
from .base import ArgumentHandler, ChunkPipeline, build_pipeline_init_args
logger = logging.get_logger(__name__)
class ZeroShotClassificationArgumentHandler(ArgumentHandler):
"""
Handles arguments for zero-shot for text classification by turning each possible label into an NLI
premise/hypothesis pair.
"""
def _parse_labels(self, labels):
if isinstance(labels, str):
labels = [label.strip() for label in labels.split(",") if label.strip()]
return labels
def __call__(self, sequences, labels, hypothesis_template):
if len(labels) == 0 or len(sequences) == 0:
raise ValueError("You must include at least one label and at least one sequence.")
if hypothesis_template.format(labels[0]) == hypothesis_template:
raise ValueError(
(
'The provided hypothesis_template "{}" was not able to be formatted with the target labels. '
"Make sure the passed template includes formatting syntax such as {{}} where the label should go."
).format(hypothesis_template)
)
if isinstance(sequences, str):
sequences = [sequences]
sequence_pairs = []
for sequence in sequences:
sequence_pairs.extend([[sequence, hypothesis_template.format(label)] for label in labels])
return sequence_pairs, sequences
@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class ZeroShotClassificationPipeline(ChunkPipeline):
"""
NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
**much** more flexible.
Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
config's :attr:*~transformers.PretrainedConfig.label2id*.
Example:
```
>>> from transformers import pipeline
>>> oracle = pipeline(model="facebook/bart-large-mnli")
>>> oracle(
... "I have a problem with my iphone that needs to be resolved asap!!",
... candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
... )
"""
pass
{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}
>>> oracle(
... "I have a problem with my iphone that needs to be resolved asap!!",
... candidate_labels=["english", "german"],
... )
{'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-classification"`.
The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
def __init__(self, args_parser=ZeroShotClassificationArgumentHandler(), *args, **kwargs):
self._args_parser = args_parser
super().__init__(*args, **kwargs)
if self.entailment_id == -1:
logger.warning(
"Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to "
"-1. Define a descriptive label2id mapping in the model config to ensure correct outputs."
)
@property
def entailment_id(self):
for label, ind in self.model.config.label2id.items():
if label.lower().startswith("entail"):
return ind
return -1
def _parse_and_tokenize(
self, sequence_pairs, padding=True, add_special_tokens=True, truncation=TruncationStrategy.ONLY_FIRST, **kwargs
):
"""
Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
"""
return_tensors = self.framework
if self.tokenizer.pad_token is None:
logger.error(
"Tokenizer was not supporting padding necessary for zero-shot, attempting to use "
" `pad_token=eos_token`"
)
self.tokenizer.pad_token = self.tokenizer.eos_token
try:
inputs = self.tokenizer(
sequence_pairs,
add_special_tokens=add_special_tokens,
return_tensors=return_tensors,
padding=padding,
truncation=truncation,
)
except Exception as e:
if "too short" in str(e):
inputs = self.tokenizer(
sequence_pairs,
add_special_tokens=add_special_tokens,
return_tensors=return_tensors,
padding=padding,
truncation=TruncationStrategy.DO_NOT_TRUNCATE,
)
else:
raise e
return inputs
def _sanitize_parameters(self, **kwargs):
if kwargs.get("multi_class", None) is not None:
kwargs["multi_label"] = kwargs["multi_class"]
logger.warning(
"The `multi_class` argument has been deprecated and renamed to `multi_label`. "
"`multi_class` will be removed in a future version of Transformers."
)
preprocess_params = {}
if "candidate_labels" in kwargs:
preprocess_params["candidate_labels"] = self._args_parser._parse_labels(kwargs["candidate_labels"])
if "hypothesis_template" in kwargs:
preprocess_params["hypothesis_template"] = kwargs["hypothesis_template"]
postprocess_params = {}
if "multi_label" in kwargs:
postprocess_params["multi_label"] = kwargs["multi_label"]
return preprocess_params, {}, postprocess_params
def __call__(
self,
sequences: Union[str, List[str]],
*args,
**kwargs,
):
"""
Classify the sequence(s) given as inputs using a zero-shot classification pipeline.
Args:
sequences (`str` or `List[str]`):
The sequence(s) to classify, will be truncated if the model input is too large.
candidate_labels (`str` or `List[str]`):
The set of possible class labels to classify each sequence into. Can be a single label, a string of
comma-separated labels, or a list of labels.
hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
similar syntax for the candidate label to be inserted into the template. For example, the default
template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
works well in many cases, but it may be worthwhile to experiment with different templates depending on
the task setting.
multi_label (`bool`, *optional*, defaults to `False`):
Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
independent and probabilities are normalized for each candidate by doing a softmax of the entailment
score vs. the contradiction score.
Return:
A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:
- **sequence** (`str`) -- The sequence for which this is the output.
- **labels** (`List[str]`) -- The labels sorted by order of likelihood.
- **scores** (`List[float]`) -- The probabilities for each of the labels.
"""
if len(args) == 0:
pass
elif len(args) == 1 and "candidate_labels" not in kwargs:
kwargs["candidate_labels"] = args[0]
else:
raise ValueError(f"Unable to understand extra arguments {args}")
return super().__call__(sequences, **kwargs)
def preprocess(self, inputs, candidate_labels=None, hypothesis_template="This example is {}."):
sequence_pairs, sequences = self._args_parser(inputs, candidate_labels, hypothesis_template)
for i, (candidate_label, sequence_pair) in enumerate(zip(candidate_labels, sequence_pairs)):
model_input = self._parse_and_tokenize([sequence_pair])
yield {
"candidate_label": candidate_label,
"sequence": sequences[0],
"is_last": i == len(candidate_labels) - 1,
**model_input,
}
def _forward(self, inputs):
candidate_label = inputs["candidate_label"]
sequence = inputs["sequence"]
model_inputs = {k: inputs[k] for k in self.tokenizer.model_input_names}
model_forward = self.model.forward if self.framework == "pt" else self.model.call
if "use_cache" in inspect.signature(model_forward).parameters.keys():
model_inputs["use_cache"] = False
outputs = self.model(**model_inputs)
model_outputs = {
"candidate_label": candidate_label,
"sequence": sequence,
"is_last": inputs["is_last"],
**outputs,
}
return model_outputs
def postprocess(self, model_outputs, multi_label=False):
candidate_labels = [outputs["candidate_label"] for outputs in model_outputs]
sequences = [outputs["sequence"] for outputs in model_outputs]
logits = np.concatenate([output["logits"].numpy() for output in model_outputs])
N = logits.shape[0]
n = len(candidate_labels)
num_sequences = N // n
reshaped_outputs = logits.reshape((num_sequences, n, -1))
if multi_label or len(candidate_labels) == 1:
entailment_id = self.entailment_id
contradiction_id = -1 if entailment_id == 0 else 0
entail_contr_logits = reshaped_outputs[..., [contradiction_id, entailment_id]]
scores = np.exp(entail_contr_logits) / np.exp(entail_contr_logits).sum(-1, keepdims=True)
scores = scores[..., 1]
else:
entail_logits = reshaped_outputs[..., self.entailment_id]
scores = np.exp(entail_logits) / np.exp(entail_logits).sum(-1, keepdims=True)
top_inds = list(reversed(scores[0].argsort()))
return {
"sequence": sequences[0],
"labels": [candidate_labels[i] for i in top_inds],
"scores": scores[0, top_inds].tolist(),
}
.\pipelines\zero_shot_image_classification.py
from collections import UserDict
from typing import List, Union
from ..utils import (
add_end_docstrings,
is_tf_available,
is_torch_available,
is_vision_available,
logging,
requires_backends,
)
from .base import Pipeline, build_pipeline_init_args
if is_vision_available():
from PIL import Image
if is_torch_available():
import torch
if is_tf_available():
from ..models.auto.modeling_tf_auto import TF_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES
logger = logging.get_logger(__name__)
@add_end_docstrings(build_pipeline_init_args(has_image_processor=True))
class ZeroShotImageClassificationPipeline(Pipeline):
"""
Zero shot image classification pipeline using `CLIPModel`. This pipeline predicts the class of an image when you
provide an image and a set of `candidate_labels`.
Example:
```
>>> from transformers import pipeline
>>> classifier = pipeline(model="google/siglip-so400m-patch14-384")
>>> classifier(
... "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
... candidate_labels=["animals", "humans", "landscape"],
... )
[{'score': 0.965, 'label': 'animals'}, {'score': 0.03, 'label': 'humans'}, {'score': 0.005, 'label': 'landscape'}]
>>> classifier(
... "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
... candidate_labels=["black and white", "photorealist", "painting"],
... )
[{'score': 0.996, 'label': 'black and white'}, {'score': 0.003, 'label': 'photorealist'}, {'score': 0.0, 'label': 'painting'}]
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This image classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-image-classification"`.
See the list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
requires_backends(self, "vision")
self.check_model_type(
TF_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES
if self.framework == "tf"
else MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES
)
def __call__(self, images: Union[str, List[str], "Image", List["Image"]], **kwargs):
"""
将标签分配给作为输入传递的图像。
Args:
images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
处理三种类型的图像:
- 包含指向图像的 http 链接的字符串
- 包含指向本地图像路径的字符串
- 直接加载到 PIL 中的图像
candidate_labels (`List[str]`):
此图像的候选标签列表
hypothesis_template (`str`, *可选*, 默认为 `"This is a photo of {}"`):
与 *candidate_labels* 结合使用的句子,通过将占位符替换为 candidate_labels 尝试图像分类。
然后使用 logits_per_image 估算可能性。
timeout (`float`, *可选*, 默认为 None):
从网络获取图像的最长等待时间(以秒为单位)。如果为 None,则不设置超时,调用可能会永远阻塞。
Return:
包含结果的字典列表,每个提议的标签一个字典。字典包含以下键:
- **label** (`str`) -- 模型识别的标签之一。它是建议的 `candidate_label` 之一。
- **score** (`float`) -- 模型为该标签分配的分数(介于0和1之间)。
"""
return super().__call__(images, **kwargs)
def _sanitize_parameters(self, **kwargs):
preprocess_params = {}
if "candidate_labels" in kwargs:
preprocess_params["candidate_labels"] = kwargs["candidate_labels"]
if "timeout" in kwargs:
preprocess_params["timeout"] = kwargs["timeout"]
if "hypothesis_template" in kwargs:
preprocess_params["hypothesis_template"] = kwargs["hypothesis_template"]
return preprocess_params, {}, {}
def preprocess(self, image, candidate_labels=None, hypothesis_template="This is a photo of {}.", timeout=None):
"""
预处理图像及其相关参数。
Args:
image: 图像数据
candidate_labels (`List[str]`, optional): 图像的候选标签
hypothesis_template (`str`, optional, defaults to `"This is a photo of {}."`):
用于替换占位符生成假设句子的模板
timeout (`float`, optional): 从网络获取图像的最长等待时间(以秒为单位)
Returns:
inputs: 包含预处理后数据的字典
"""
image = load_image(image, timeout=timeout)
inputs = self.image_processor(images=[image], return_tensors=self.framework)
inputs["candidate_labels"] = candidate_labels
sequences = [hypothesis_template.format(x) for x in candidate_labels]
padding = "max_length" if self.model.config.model_type == "siglip" else True
text_inputs = self.tokenizer(sequences, return_tensors=self.framework, padding=padding)
inputs["text_inputs"] = [text_inputs]
return inputs
def _forward(self, model_inputs):
candidate_labels = model_inputs.pop("candidate_labels")
text_inputs = model_inputs.pop("text_inputs")
if isinstance(text_inputs[0], UserDict):
text_inputs = text_inputs[0]
else:
text_inputs = text_inputs[0][0]
outputs = self.model(**text_inputs, **model_inputs)
model_outputs = {
"candidate_labels": candidate_labels,
"logits": outputs.logits_per_image,
}
return model_outputs
def postprocess(self, model_outputs):
candidate_labels = model_outputs.pop("candidate_labels")
logits = model_outputs["logits"][0]
if self.framework == "pt" and self.model.config.model_type == "siglip":
probs = torch.sigmoid(logits).squeeze(-1)
scores = probs.tolist()
if not isinstance(scores, list):
scores = [scores]
elif self.framework == "pt":
probs = logits.softmax(dim=-1).squeeze(-1)
scores = probs.tolist()
if not isinstance(scores, list):
scores = [scores]
elif self.framework == "tf":
probs = stable_softmax(logits, axis=-1)
scores = probs.numpy().tolist()
else:
raise ValueError(f"Unsupported framework: {self.framework}")
result = [
{"score": score, "label": candidate_label}
for score, candidate_label in sorted(zip(scores, candidate_labels), key=lambda x: -x[0])
]
return result
.\pipelines\zero_shot_object_detection.py
from typing import Any, Dict, List, Union
from ..utils import add_end_docstrings, is_torch_available, is_vision_available, logging, requires_backends
from .base import ChunkPipeline, build_pipeline_init_args
if is_vision_available():
from PIL import Image
from ..image_utils import load_image
if is_torch_available():
import torch
from transformers.modeling_outputs import BaseModelOutput
from ..models.auto.modeling_auto import MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES
logger = logging.get_logger(__name__)
@add_end_docstrings(build_pipeline_init_args(has_image_processor=True))
class ZeroShotObjectDetectionPipeline(ChunkPipeline):
"""
Zero shot object detection pipeline using `OwlViTForObjectDetection`. This pipeline predicts bounding boxes of
objects when you provide an image and a set of `candidate_labels`.
Example:
```
>>> from transformers import pipeline
>>> detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
>>> detector(
... "http://images.cocodataset.org/val2017/000000039769.jpg",
... candidate_labels=["cat", "couch"],
... )
[{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.254, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}]
>>> detector(
... "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
... candidate_labels=["head", "bird"],
... )
[{'score': 0.119, 'label': 'bird', 'box': {'xmin': 71, 'ymin': 170, 'xmax': 410, 'ymax': 508}}]
```
Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)
This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-object-detection"`.
See the list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-object-detection).
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
if self.framework == "tf":
raise ValueError(f"The {self.__class__} is only available in PyTorch.")
requires_backends(self, "vision")
self.check_model_type(MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES)
def __call__(
self,
image: Union[str, "Image.Image", List[Dict[str, Any]]],
candidate_labels: Union[str, List[str]] = None,
**kwargs,
`
def _sanitize_parameters(self, **kwargs):
preprocess_params = {}
if "timeout" in kwargs:
preprocess_params["timeout"] = kwargs["timeout"]
postprocess_params = {}
if "threshold" in kwargs:
postprocess_params["threshold"] = kwargs["threshold"]
if "top_k" in kwargs:
postprocess_params["top_k"] = kwargs["top_k"]
return preprocess_params, {}, postprocess_params
def preprocess(self, inputs, timeout=None):
image = load_image(inputs["image"], timeout=timeout)
candidate_labels = inputs["candidate_labels"]
if isinstance(candidate_labels, str):
candidate_labels = candidate_labels.split(",")
target_size = torch.tensor([[image.height, image.width]], dtype=torch.int32)
for i, candidate_label in enumerate(candidate_labels):
text_inputs = self.tokenizer(candidate_label, return_tensors=self.framework)
image_features = self.image_processor(image, return_tensors=self.framework)
yield {
"is_last": i == len(candidate_labels) - 1,
"target_size": target_size,
"candidate_label": candidate_label,
**text_inputs,
**image_features,
}
def _forward(self, model_inputs):
target_size = model_inputs.pop("target_size")
candidate_label = model_inputs.pop("candidate_label")
is_last = model_inputs.pop("is_last")
outputs = self.model(**model_inputs)
model_outputs = {"target_size": target_size, "candidate_label": candidate_label, "is_last": is_last, **outputs}
return model_outputs
def postprocess(self, model_outputs, threshold=0.1, top_k=None):
results = []
for model_output in model_outputs:
label = model_output["candidate_label"]
model_output = BaseModelOutput(model_output)
outputs = self.image_processor.post_process_object_detection(
outputs=model_output, threshold=threshold, target_sizes=model_output["target_size"]
)[0]
)[0]
for index in outputs["scores"].nonzero():
score = outputs["scores"][index].item()
box = self._get_bounding_box(outputs["boxes"][index][0])
result = {"score": score, "label": label, "box": box}
results.append(result)
results = sorted(results, key=lambda x: x["score"], reverse=True)
if top_k:
results = results[:top_k]
return results
def _get_bounding_box(self, box: "torch.Tensor") -> Dict[str, int]:
"""
Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }
Args:
box (`torch.Tensor`): Tensor containing the coordinates in corners format.
Returns:
bbox (`Dict[str, int]`): Dict containing the coordinates in corners format.
"""
if self.framework != "pt":
raise ValueError("The ZeroShotObjectDetectionPipeline is only available in PyTorch.")
xmin, ymin, xmax, ymax = box.int().tolist()
bbox = {
"xmin": xmin,
"ymin": ymin,
"xmax": xmax,
"ymax": ymax,
}
return bbox
.\pipelines\__init__.py
import json
import os
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
from huggingface_hub import model_info
from ..configuration_utils import PretrainedConfig
from ..dynamic_module_utils import get_class_from_dynamic_module
from ..feature_extraction_utils import PreTrainedFeatureExtractor
from ..image_processing_utils import BaseImageProcessor
from ..models.auto.configuration_auto import AutoConfig
from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor
from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor
from ..models.auto.modeling_auto import AutoModelForDepthEstimation, AutoModelForImageToImage
from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer
from ..tokenization_utils import PreTrainedTokenizer
from ..utils import (
CONFIG_NAME,
HUGGINGFACE_CO_RESOLVE_ENDPOINT,
cached_file,
extract_commit_hash,
find_adapter_config_file,
is_kenlm_available,
is_offline_mode,
is_peft_available,
is_pyctcdecode_available,
is_tf_available,
is_torch_available,
logging,
)
from .audio_classification import AudioClassificationPipeline
from .automatic_speech_recognition import AutomaticSpeechRecognitionPipeline
from .base import (
ArgumentHandler,
CsvPipelineDataFormat,
JsonPipelineDataFormat,
PipedPipelineDataFormat,
Pipeline,
PipelineDataFormat,
PipelineException,
PipelineRegistry,
get_default_model_and_revision,
infer_framework_load_model,
)
from .conversational import Conversation, ConversationalPipeline
from .depth_estimation import DepthEstimationPipeline
from .document_question_answering import DocumentQuestionAnsweringPipeline
from .feature_extraction import FeatureExtractionPipeline
from .fill_mask import FillMaskPipeline
from .image_classification import ImageClassificationPipeline
from .image_feature_extraction import ImageFeatureExtractionPipeline
from .image_segmentation import ImageSegmentationPipeline
from .image_to_image import ImageToImagePipeline
from .image_to_text import ImageToTextPipeline
from .mask_generation import MaskGenerationPipeline
from .object_detection import ObjectDetectionPipeline
from .question_answering import QuestionAnsweringArgumentHandler, QuestionAnsweringPipeline
from .table_question_answering import TableQuestionAnsweringArgumentHandler, TableQuestionAnsweringPipeline
from .text2text_generation import SummarizationPipeline, Text2TextGenerationPipeline, TranslationPipeline
from .text_classification import TextClassificationPipeline
from .text_generation import TextGenerationPipeline
from .text_to_audio import TextToAudioPipeline
from .token_classification import (
AggregationStrategy,
NerPipeline,
TokenClassificationArgumentHandler,
TokenClassificationPipeline,
)
from .video_classification import VideoClassificationPipeline
from .visual_question_answering import VisualQuestionAnsweringPipeline
from .zero_shot_audio_classification import ZeroShotAudioClassificationPipeline
from .zero_shot_classification import ZeroShotClassificationArgumentHandler, ZeroShotClassificationPipeline
from .zero_shot_image_classification import ZeroShotImageClassificationPipeline
from .zero_shot_object_detection import ZeroShotObjectDetectionPipeline
if is_tf_available():
import tensorflow as tf
from ..models.auto.modeling_tf_auto import (
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForImageClassification,
TFAutoModelForMaskedLM,
TFAutoModelForQuestionAnswering,
TFAutoModelForSeq2SeqLM,
TFAutoModelForSequenceClassification,
TFAutoModelForTableQuestionAnswering,
TFAutoModelForTokenClassification,
TFAutoModelForVision2Seq,
TFAutoModelForZeroShotImageClassification,
)
if is_torch_available():
import torch
from ..models.auto.modeling_auto import (
AutoModel,
AutoModelForAudioClassification,
AutoModelForCausalLM,
AutoModelForCTC,
AutoModelForDocumentQuestionAnswering,
AutoModelForImageClassification,
AutoModelForImageSegmentation,
AutoModelForMaskedLM,
AutoModelForMaskGeneration,
AutoModelForObjectDetection,
AutoModelForQuestionAnswering,
AutoModelForSemanticSegmentation,
AutoModelForSeq2SeqLM,
AutoModelForSequenceClassification,
AutoModelForSpeechSeq2Seq,
AutoModelForTableQuestionAnswering,
AutoModelForTextToSpectrogram,
AutoModelForTextToWaveform,
AutoModelForTokenClassification,
AutoModelForVideoClassification,
AutoModelForVision2Seq,
AutoModelForVisualQuestionAnswering,
AutoModelForZeroShotImageClassification,
AutoModelForZeroShotObjectDetection,
)
if TYPE_CHECKING:
from ..modeling_tf_utils import TFPreTrainedModel
from ..modeling_utils import PreTrainedModel
from ..tokenization_utils_fast import PreTrainedTokenizerFast
logger = logging.get_logger(__name__)
TASK_ALIASES = {
"sentiment-analysis": "text-classification",
"ner": "token-classification",
"vqa": "visual-question-answering",
"text-to-speech": "text-to-audio",
}
SUPPORTED_TASKS = {
"audio-classification": {
"impl": AudioClassificationPipeline,
"tf": (),
"pt": (AutoModelForAudioClassification,) if is_torch_available() else (),
"default": {"model": {"pt": ("superb/wav2vec2-base-superb-ks", "372e048")}},
"type": "audio",
},
"automatic-speech-recognition": {
"impl": AutomaticSpeechRecognitionPipeline,
"tf": (),
"pt": (AutoModelForCTC, AutoModelForSpeechSeq2Seq) if is_torch_available() else (),
"default": {"model": {"pt": ("facebook/wav2vec2-base-960h", "55bb623")}},
"type": "multimodal",
},
"text-to-audio": {
"impl": TextToAudioPipeline,
"tf": (),
"pt": (AutoModelForTextToWaveform, AutoModelForTextToSpectrogram) if is_torch_available() else (),
"default": {"model": {"pt": ("suno/bark-small", "645cfba")}},
"type": "text",
},
"feature-extraction": {
"impl": FeatureExtractionPipeline,
"tf": (TFAutoModel,) if is_tf_available() else (),
"pt": (AutoModel,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("distilbert/distilbert-base-cased", "935ac13"),
"tf": ("distilbert/distilbert-base-cased", "935ac13"),
}
},
"type": "multimodal",
},
"text-classification": {
"impl": TextClassificationPipeline,
"tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
"tf": ("distilbert/distilbert-base-uncased-finetuned-sst-2-english", "af0f99b"),
},
},
"type": "text",
},
"token-classification": {
"impl": TokenClassificationPipeline,
"tf": (TFAutoModelForTokenClassification,) if is_tf_available() else (),
"pt": (AutoModelForTokenClassification,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
"tf": ("dbmdz/bert-large-cased-finetuned-conll03-english", "f2482bf"),
},
},
"type": "text",
},
"question-answering": {
"impl": QuestionAnsweringPipeline,
"tf": (TFAutoModelForQuestionAnswering,) if is_tf_available() else (),
"pt": (AutoModelForQuestionAnswering,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"),
"tf": ("distilbert/distilbert-base-cased-distilled-squad", "626af31"),
},
},
"type": "text",
},
"table-question-answering": {
"impl": TableQuestionAnsweringPipeline,
"pt": (AutoModelForTableQuestionAnswering,) if is_torch_available() else (),
"tf": (TFAutoModelForTableQuestionAnswering,) if is_tf_available() else (),
"default": {
"model": {
"pt": ("google/tapas-base-finetuned-wtq", "69ceee2"),
"tf": ("google/tapas-base-finetuned-wtq", "69ceee2"),
},
},
"type": "text",
},
"visual-question-answering": {
"impl": VisualQuestionAnsweringPipeline,
"pt": (AutoModelForVisualQuestionAnswering,) if is_torch_available() else (),
"tf": (),
"default": {
"model": {
"pt": ("dandelin/vilt-b32-finetuned-vqa", "4355f59"),
},
},
"type": "multimodal",
},
"document-question-answering": {
"impl": DocumentQuestionAnsweringPipeline,
"pt": (AutoModelForDocumentQuestionAnswering,) if is_torch_available() else (),
"tf": (),
"default": {
"model": {
"pt": ("impira/layoutlm-document-qa", "52e01b3"),
},
},
"type": "multimodal",
},
"fill-mask": {
"impl": FillMaskPipeline,
"tf": (TFAutoModelForMaskedLM,) if is_tf_available() else (),
"pt": (AutoModelForMaskedLM,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("distilbert/distilroberta-base", "ec58a5b"),
"tf": ("distilbert/distilroberta-base", "ec58a5b"),
}
},
"type": "text",
},
"summarization": {
"impl": SummarizationPipeline,
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("sshleifer/distilbart-cnn-12-6", "a4f8f3e"),
"tf": ("google-t5/t5-small", "d769bba")
}
},
"type": "text",
},
"translation": {
"impl": TranslationPipeline,
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
"default": {
("en", "fr"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}},
("en", "de"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}},
("en", "ro"): {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}},
},
"type": "text",
},
"text2text-generation": {
"impl": Text2TextGenerationPipeline,
"tf": (TFAutoModelForSeq2SeqLM,) if is_tf_available() else (),
"pt": (AutoModelForSeq2SeqLM,) if is_torch_available() else (),
"default": {"model": {"pt": ("google-t5/t5-base", "686f1db"), "tf": ("google-t5/t5-base", "686f1db")}},
"type": "text",
},
"text-generation": {
"impl": TextGenerationPipeline,
"tf": (TFAutoModelForCausalLM,) if is_tf_available() else (),
"pt": (AutoModelForCausalLM,) if is_torch_available() else (),
"default": {"model": {"pt": ("openai-community/gpt2", "6c0e608"), "tf": ("openai-community/gpt2", "6c0e608")}},
"type": "text",
},
"zero-shot-classification": {
"impl": ZeroShotClassificationPipeline,
"tf": (TFAutoModelForSequenceClassification,) if is_tf_available() else (),
"pt": (AutoModelForSequenceClassification,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("facebook/bart-large-mnli", "c626438"),
"tf": ("FacebookAI/roberta-large-mnli", "130fb28"),
},
"config": {
"pt": ("facebook/bart-large-mnli", "c626438"),
"tf": ("FacebookAI/roberta-large-mnli", "130fb28"),
},
},
"type": "text",
},
"zero-shot-image-classification": {
"impl": ZeroShotImageClassificationPipeline,
"tf": (TFAutoModelForZeroShotImageClassification,) if is_tf_available() else (),
"pt": (AutoModelForZeroShotImageClassification,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("openai/clip-vit-base-patch32", "f4881ba"),
"tf": ("openai/clip-vit-base-patch32", "f4881ba"),
}
},
"type": "multimodal",
},
"zero-shot-audio-classification": {
"impl": ZeroShotAudioClassificationPipeline,
"tf": (),
"pt": (AutoModel,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("laion/clap-htsat-fused", "973b6e5"),
}
},
"type": "multimodal",
},
"conversational": {
"impl": ConversationalPipeline,
"tf": (TFAutoModelForSeq2SeqLM, TFAutoModelForCausalLM) if is_tf_available() else (),
"pt": (AutoModelForSeq2SeqLM, AutoModelForCausalLM) if is_torch_available() else (),
"default": {
"model": {"pt": ("microsoft/DialoGPT-medium", "8bada3b"), "tf": ("microsoft/DialoGPT-medium", "8bada3b")}
},
"type": "text",
},
{
"image-classification": {
"impl": ImageClassificationPipeline,
"tf": (TFAutoModelForImageClassification,) if is_tf_available() else (),
"pt": (AutoModelForImageClassification,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("google/vit-base-patch16-224", "5dca96d"),
"tf": ("google/vit-base-patch16-224", "5dca96d"),
}
},
"type": "image",
},
"image-feature-extraction": {
"impl": ImageFeatureExtractionPipeline,
"tf": (TFAutoModel,) if is_tf_available() else (),
"pt": (AutoModel,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("google/vit-base-patch16-224", "29e7a1e183"),
"tf": ("google/vit-base-patch16-224", "29e7a1e183"),
}
},
"type": "image",
},
"image-segmentation": {
"impl": ImageSegmentationPipeline,
"tf": (),
"pt": (AutoModelForImageSegmentation, AutoModelForSemanticSegmentation) if is_torch_available() else (),
"default": {"model": {"pt": ("facebook/detr-resnet-50-panoptic", "fc15262")}},
"type": "multimodal",
},
"image-to-text": {
"impl": ImageToTextPipeline,
"tf": (TFAutoModelForVision2Seq,) if is_tf_available() else (),
"pt": (AutoModelForVision2Seq,) if is_torch_available() else (),
"default": {
"model": {
"pt": ("ydshieh/vit-gpt2-coco-en", "65636df"),
"tf": ("ydshieh/vit-gpt2-coco-en", "65636df"),
}
},
"type": "multimodal",
},
"object-detection": {
"impl": ObjectDetectionPipeline,
"tf": (),
"pt": (AutoModelForObjectDetection,) if is_torch_available() else (),
"default": {"model": {"pt": ("facebook/detr-resnet-50", "2729413")}},
"type": "multimodal",
},
"zero-shot-object-detection": {
"impl": ZeroShotObjectDetectionPipeline,
"tf": (),
"pt": (AutoModelForZeroShotObjectDetection,) if is_torch_available() else (),
"default": {"model": {"pt": ("google/owlvit-base-patch32", "17740e1")}},
"type": "multimodal",
},
"depth-estimation": {
"impl": DepthEstimationPipeline,
"tf": (),
"pt": (AutoModelForDepthEstimation,) if is_torch_available() else (),
"default": {"model": {"pt": ("Intel/dpt-large", "e93beec")}},
"type": "image",
},
"video-classification": {
"impl": VideoClassificationPipeline,
"tf": (),
"pt": (AutoModelForVideoClassification,) if is_torch_available() else (),
"default": {"model": {"pt": ("MCG-NJU/videomae-base-finetuned-kinetics", "4800870")}},
"type": "video",
},
}
"mask-generation": {
"impl": MaskGenerationPipeline,
"tf": (),
"pt": (AutoModelForMaskGeneration,) if is_torch_available() else (),
"default": {"model": {"pt": ("facebook/sam-vit-huge", "997b15")}},
"type": "multimodal",
},
"image-to-image": {
"impl": ImageToImagePipeline,
"tf": (),
"pt": (AutoModelForImageToImage,) if is_torch_available() else (),
"default": {"model": {"pt": ("caidas/swin2SR-classical-sr-x2-64", "4aaedcb")}},
"type": "image",
},
}
NO_FEATURE_EXTRACTOR_TASKS = set()
NO_IMAGE_PROCESSOR_TASKS = set()
NO_TOKENIZER_TASKS = set()
MULTI_MODEL_AUDIO_CONFIGS = {"SpeechEncoderDecoderConfig"}
MULTI_MODEL_VISION_CONFIGS = {"VisionEncoderDecoderConfig", "VisionTextDualEncoderConfig"}
for task, values in SUPPORTED_TASKS.items():
if values["type"] == "text":
NO_FEATURE_EXTRACTOR_TASKS.add(task)
NO_IMAGE_PROCESSOR_TASKS.add(task)
elif values["type"] in {"image", "video"}:
NO_TOKENIZER_TASKS.add(task)
elif values["type"] in {"audio"}:
NO_TOKENIZER_TASKS.add(task)
NO_IMAGE_PROCESSOR_TASKS.add(task)
elif values["type"] != "multimodal":
raise ValueError(f"SUPPORTED_TASK {task} contains invalid type {values['type']}")
PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES)
def get_supported_tasks() -> List[str]:
"""
返回支持的任务列表。
"""
return PIPELINE_REGISTRY.get_supported_tasks()
def get_task(model: str, token: Optional[str] = None, **deprecated_kwargs) -> str:
"""
根据模型和令牌返回任务字符串,支持废弃的参数。
"""
use_auth_token = deprecated_kwargs.pop("use_auth_token", None)
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
FutureWarning,
)
if token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
token = use_auth_token
if is_offline_mode():
raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode")
try:
info = model_info(model, token=token)
except Exception as e:
raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}")
if not info.pipeline_tag:
raise RuntimeError(
f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically"
)
if getattr(info, "library_name", "transformers") != "transformers":
raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers")
task = info.pipeline_tag
return task
def check_task(task: str) -> Tuple[str, Dict, Any]:
"""
检查传入的任务字符串,验证其正确性,并返回默认的管道和模型类,以及默认模型(如果存在)。
"""
Args:
task (`str`):
指定要返回的流水线的任务。目前接受的任务包括:
- `"audio-classification"`
- `"automatic-speech-recognition"`
- `"conversational"`
- `"depth-estimation"`
- `"document-question-answering"`
- `"feature-extraction"`
- `"fill-mask"`
- `"image-classification"`
- `"image-feature-extraction"`
- `"image-segmentation"`
- `"image-to-text"`
- `"image-to-image"`
- `"object-detection"`
- `"question-answering"`
- `"summarization"`
- `"table-question-answering"`
- `"text2text-generation"`
- `"text-classification"`(别名为 `"sentiment-analysis"` 可用)
- `"text-generation"`
- `"text-to-audio"`(别名为 `"text-to-speech"` 可用)
- `"token-classification"`(别名为 `"ner"` 可用)
- `"translation"`
- `"translation_xx_to_yy"`
- `"video-classification"`
- `"visual-question-answering"`(别名为 `"vqa"` 可用)
- `"zero-shot-classification"`
- `"zero-shot-image-classification"`
- `"zero-shot-object-detection"`
Returns:
返回一个元组,包含标准化后的任务名称 `normalized_task`(去除了别名和选项)、任务默认设置字典 `task_defaults`,以及一些额外的任务选项 `task_options`(对于像 "translation_XX_to_YY" 这样带参数的任务)。
"""
return PIPELINE_REGISTRY.check_task(task)
def clean_custom_task(task_info):
import transformers # 导入transformers库
# 检查任务信息中是否包含实现信息,如果没有则抛出运行时错误
if "impl" not in task_info:
raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.")
pt_class_names = task_info.get("pt", ()) # 获取pt_class_names,如果不存在则默认为空元组
if isinstance(pt_class_names, str):
pt_class_names = [pt_class_names] # 如果pt_class_names是字符串,转换为列表
# 将pt_class_names中每个类名对应的类对象存入task_info["pt"]中
task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names)
tf_class_names = task_info.get("tf", ()) # 获取tf_class_names,如果不存在则默认为空元组
if isinstance(tf_class_names, str):
tf_class_names = [tf_class_names] # 如果tf_class_names是字符串,转换为列表
# 将tf_class_names中每个类名对应的类对象存入task_info["tf"]中
task_info["tf"] = tuple(getattr(transformers, c) for c in tf_class_names)
return task_info, None # 返回更新后的task_info和None作为第二个返回值
def pipeline(
task: str = None,
model: Optional[Union[str, "PreTrainedModel", "TFPreTrainedModel"]] = None,
config: Optional[Union[str, PretrainedConfig]] = None,
tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None,
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
image_processor: Optional[Union[str, BaseImageProcessor]] = None,
framework: Optional[str] = None,
revision: Optional[str] = None,
use_fast: bool = True,
token: Optional[Union[str, bool]] = None,
device: Optional[Union[int, str, "torch.device"]] = None,
device_map=None,
torch_dtype=None,
trust_remote_code: Optional[bool] = None,
model_kwargs: Dict[str, Any] = None,
pipeline_class: Optional[Any] = None,
**kwargs,
) -> Pipeline:
"""
Utility factory method to build a [`Pipeline`].
Pipelines are made of:
- A [tokenizer](tokenizer) in charge of mapping raw textual input to token.
- A [model](model) to make predictions from the inputs.
- Some (optional) post processing for enhancing model's output.
Returns:
[`Pipeline`]: A suitable pipeline for the task.
Examples:
```
>>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
>>> # Sentiment analysis pipeline
>>> analyzer = pipeline("sentiment-analysis")
>>> # Question answering pipeline, specifying the checkpoint identifier
>>> oracle = pipeline(
... "question-answering", model="distilbert/distilbert-base-cased-distilled-squad", tokenizer="google-bert/bert-base-cased"
... )
>>> # Named entity recognition pipeline, passing in a specific model and tokenizer
>>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
>>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer)
```"""
if model_kwargs is None:
model_kwargs = {}
# 确保只将use_auth_token作为一个关键字参数传递(以前可以将其传递给model_kwargs,为了保持向后兼容性)
use_auth_token = model_kwargs.pop("use_auth_token", None)
# 如果 use_auth_token 参数不为 None,则发出警告,提醒该参数在 Transformers v5 版本中将被移除,建议使用 `token` 参数代替
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
FutureWarning,
)
# 如果 token 参数也不为 None,则抛出 ValueError,说明同时指定了 `token` 和 `use_auth_token` 参数,应只设置 `token` 参数
if token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
# 将 use_auth_token 的值赋给 token 参数
token = use_auth_token
# 从 kwargs 字典中弹出 code_revision 和 _commit_hash 参数的值
code_revision = kwargs.pop("code_revision", None)
commit_hash = kwargs.pop("_commit_hash", None)
# 创建 hub_kwargs 字典,用于存储 revision、token、trust_remote_code 和 _commit_hash 参数的值
hub_kwargs = {
"revision": revision,
"token": token,
"trust_remote_code": trust_remote_code,
"_commit_hash": commit_hash,
}
# 如果既未指定 task 参数也未指定 model 参数,则抛出 RuntimeError,说明无法实例化 Pipeline
if task is None and model is None:
raise RuntimeError(
"Impossible to instantiate a pipeline without either a task or a model "
"being specified. "
"Please provide a task class or a model"
)
# 如果未指定 model 参数但指定了 tokenizer 参数,则抛出 RuntimeError,说明无法实例化 Pipeline
if model is None and tokenizer is not None:
raise RuntimeError(
"Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer"
" may not be compatible with the default model. Please provide a PreTrainedModel class or a"
" path/identifier to a pretrained model when providing tokenizer."
)
# 如果未指定 model 参数但指定了 feature_extractor 参数,则抛出 RuntimeError,说明无法实例化 Pipeline
if model is None and feature_extractor is not None:
raise RuntimeError(
"Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided"
" feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class"
" or a path/identifier to a pretrained model when providing feature_extractor."
)
# 如果 model 参数的类型是 Path 对象,则将其转换为字符串类型
if isinstance(model, Path):
model = str(model)
# 如果 commit_hash 参数为 None
if commit_hash is None:
# 预先训练的模型名或路径名为 None
pretrained_model_name_or_path = None
# 如果 config 参数是字符串类型,则将其赋值给 pretrained_model_name_or_path
if isinstance(config, str):
pretrained_model_name_or_path = config
# 如果 config 参数为 None 且 model 参数为字符串类型,则将 model 参数赋值给 pretrained_model_name_or_path
elif config is None and isinstance(model, str):
pretrained_model_name_or_path = model
# 如果 config 参数不是 PretrainedConfig 类型且 pretrained_model_name_or_path 不为 None
if not isinstance(config, PretrainedConfig) and pretrained_model_name_or_path is not None:
# 首先调用配置文件 (可能不存在) 获取 commit hash
resolved_config_file = cached_file(
pretrained_model_name_or_path,
CONFIG_NAME,
_raise_exceptions_for_gated_repo=False,
_raise_exceptions_for_missing_entries=False,
_raise_exceptions_for_connection_errors=False,
**hub_kwargs,
)
# 从配置文件中提取 commit hash,更新 hub_kwargs 中的 _commit_hash 参数
hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash)
else:
# 否则,从 config 对象中获取 _commit_hash 属性的值,更新 hub_kwargs 中的 _commit_hash 参数
hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None)
# 配置是最原始的信息项。
# 如有需要则实例化配置
# 如果配置是字符串,则根据预训练模型配置自动生成配置对象
if isinstance(config, str):
config = AutoConfig.from_pretrained(
config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs
)
# 更新 hub_kwargs 中的 _commit_hash
hub_kwargs["_commit_hash"] = config._commit_hash
# 如果配置为 None 且模型路径是字符串
elif config is None and isinstance(model, str):
# 如果 PEFT 可用,检查模型路径中是否存在适配器文件
if is_peft_available():
# 在模型路径中查找适配器配置文件,不包括 `trust_remote_code` 参数
_hub_kwargs = {k: v for k, v in hub_kwargs.items() if k != "trust_remote_code"}
maybe_adapter_path = find_adapter_config_file(
model,
token=hub_kwargs["token"],
revision=hub_kwargs["revision"],
_commit_hash=hub_kwargs["_commit_hash"],
)
# 如果找到适配器路径,则加载适配器配置文件中的基础模型名称或路径
if maybe_adapter_path is not None:
with open(maybe_adapter_path, "r", encoding="utf-8") as f:
adapter_config = json.load(f)
model = adapter_config["base_model_name_or_path"]
# 根据模型路径加载自动配置对象
config = AutoConfig.from_pretrained(
model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs
)
# 更新 hub_kwargs 中的 _commit_hash
hub_kwargs["_commit_hash"] = config._commit_hash
# 自定义任务字典初始化为空
custom_tasks = {}
# 如果配置对象不为空且存在自定义流水线,则获取自定义流水线任务
if config is not None and len(getattr(config, "custom_pipelines", {})) > 0:
custom_tasks = config.custom_pipelines
# 如果任务为 None 且不禁止远程代码,则尝试自动推断任务
if task is None and trust_remote_code is not False:
# 如果只有一个自定义任务,则自动选择该任务
if len(custom_tasks) == 1:
task = list(custom_tasks.keys())[0]
else:
# 如果存在多个自定义任务,则抛出运行时错误,要求手动选择任务
raise RuntimeError(
"We can't infer the task automatically for this model as there are multiple tasks available. Pick "
f"one in {', '.join(custom_tasks.keys())}"
)
# 如果任务仍为 None 且模型不为空,则尝试获取任务
if task is None and model is not None:
# 如果模型不是字符串,则抛出运行时错误
if not isinstance(model, str):
raise RuntimeError(
"Inferring the task automatically requires to check the hub with a model_id defined as a `str`. "
f"{model} is not a valid model_id."
)
# 根据模型 ID 和 token 获取任务
task = get_task(model, token)
# 获取任务后的处理流程
if task in custom_tasks:
# 标准化任务名称
normalized_task = task
# 清理自定义任务,获取目标任务和任务选项
targeted_task, task_options = clean_custom_task(custom_tasks[task])
# 如果未指定流水线类,则根据情况抛出 ValueError
if pipeline_class is None:
# 如果不信任远程代码,则要求设置 `trust_remote_code=True` 以移除错误
if not trust_remote_code:
raise ValueError(
"Loading this pipeline requires you to execute the code in the pipeline file in that"
" repo on your local machine. Make sure you have read the code there to avoid malicious use, then"
" set the option `trust_remote_code=True` to remove this error."
)
# 从动态模块中获取类引用
class_ref = targeted_task["impl"]
pipeline_class = get_class_from_dynamic_module(
class_ref,
model,
code_revision=code_revision,
**hub_kwargs,
)
else:
# 检查任务并返回标准化的任务、目标任务和任务选项
normalized_task, targeted_task, task_options = check_task(task)
# 如果未指定流水线类,则使用目标任务的实现类作为默认流水线类
if pipeline_class is None:
pipeline_class = targeted_task["impl"]
# 如果未提供模型,则使用任务的默认模型、配置和分词器
if model is None:
# 获取任务的默认模型及其修订版本
model, default_revision = get_default_model_and_revision(targeted_task, framework, task_options)
# 如果未指定修订版本,则使用默认修订版本
revision = revision if revision is not None else default_revision
# 记录警告信息,指出未提供模型,使用默认模型和修订版本
logger.warning(
f"No model was supplied, defaulted to {model} and revision"
f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n"
"Using a pipeline without specifying a model name and revision in production is not recommended."
)
# 如果未提供配置且模型名称为字符串,则从预训练模型中创建配置对象
if config is None and isinstance(model, str):
config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs)
# 将配置的提交哈希记录到 hub_kwargs 中
hub_kwargs["_commit_hash"] = config._commit_hash
# 如果设备映射不为空,则处理相关参数
if device_map is not None:
# 如果模型参数中已包含 device_map,抛出错误
if "device_map" in model_kwargs:
raise ValueError(
'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those'
" arguments might conflict, use only one.)"
)
# 如果同时指定了 device 和 device_map,则发出警告
if device is not None:
logger.warning(
"Both `device` and `device_map` are specified. `device` will override `device_map`. You"
" will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`."
)
# 将 device_map 添加到模型参数中
model_kwargs["device_map"] = device_map
# 如果 torch 数据类型不为空,则处理相关参数
if torch_dtype is not None:
# 如果模型参数中已包含 torch_dtype,抛出错误
if "torch_dtype" in model_kwargs:
raise ValueError(
'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those'
" arguments might conflict, use only one.)"
)
# 如果 torch_dtype 是字符串且存在于 torch 模块中,则转换成相应的 torch 数据类型
if isinstance(torch_dtype, str) and hasattr(torch, torch_dtype):
torch_dtype = getattr(torch, torch_dtype)
# 将 torch_dtype 添加到模型参数中
model_kwargs["torch_dtype"] = torch_dtype
# 如果模型名称是字符串,则推断框架并加载模型
if isinstance(model, str) or framework is None:
# 定义模型类别(TensorFlow 或 PyTorch)并根据模型加载相应的框架和模型
model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]}
framework, model = infer_framework_load_model(
model,
model_classes=model_classes,
config=config,
framework=framework,
task=task,
**hub_kwargs,
**model_kwargs,
)
# 获取模型的配置信息
model_config = model.config
# 将配置的提交哈希记录到 hub_kwargs 中
hub_kwargs["_commit_hash"] = model.config._commit_hash
# 判断是否需要加载分词器
load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None
# 判断是否需要加载特征提取器
load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None
# 检查是否需要加载图像处理器,条件为模型配置在图像处理器映射中或者图像处理器不为空
load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None
# 如果传入的`model`(`PretrainedModel`的实例而不是字符串),并且`image_processor`或`feature_extractor`为空,
# 则加载将失败。这在某些视觉任务中特别发生,当使用`pipeline()`函数时传入`model`和其中一个`image_processor`或`feature_extractor`时。
# TODO: 我们需要使`NO_IMAGE_PROCESSOR_TASKS`和`NO_FEATURE_EXTRACTOR_TASKS`更加健壮,以避免这种问题。
# 这段代码仅用于临时使CI通过。
if load_image_processor and load_feature_extractor:
load_feature_extractor = False
# 如果`tokenizer`为空,并且不需要加载`tokenizer`,并且`normalized_task`不在`NO_TOKENIZER_TASKS`中,
# 并且`model_config`的类名在`MULTI_MODEL_AUDIO_CONFIGS`或`MULTI_MODEL_VISION_CONFIGS`中,
# 则尝试强制加载`tokenizer`。
if (
tokenizer is None
and not load_tokenizer
and normalized_task not in NO_TOKENIZER_TASKS
# 使用类名来避免导入真实类。
and (
model_config.__class__.__name__ in MULTI_MODEL_AUDIO_CONFIGS
or model_config.__class__.__name__ in MULTI_MODEL_VISION_CONFIGS
)
):
load_tokenizer = True
# 如果`image_processor`为空,并且不需要加载`image_processor`,并且`normalized_task`不在`NO_IMAGE_PROCESSOR_TASKS`中,
# 并且`model_config`的类名在`MULTI_MODEL_VISION_CONFIGS`中,
# 则尝试强制加载`image_processor`。
if (
image_processor is None
and not load_image_processor
and normalized_task not in NO_IMAGE_PROCESSOR_TASKS
# 使用类名来避免导入真实类。
and model_config.__class__.__name__ in MULTI_MODEL_VISION_CONFIGS
):
load_image_processor = True
# 如果`feature_extractor`为空,并且不需要加载`feature_extractor`,并且`normalized_task`不在`NO_FEATURE_EXTRACTOR_TASKS`中,
# 并且`model_config`的类名在`MULTI_MODEL_AUDIO_CONFIGS`中,
# 则尝试强制加载`feature_extractor`。
if (
feature_extractor is None
and not load_feature_extractor
and normalized_task not in NO_FEATURE_EXTRACTOR_TASKS
# 使用类名来避免导入真实类。
and model_config.__class__.__name__ in MULTI_MODEL_AUDIO_CONFIGS
):
load_feature_extractor = True
# 如果任务在`NO_TOKENIZER_TASKS`中,则不需要加载`tokenizer`。
if task in NO_TOKENIZER_TASKS:
load_tokenizer = False
# 如果任务在`NO_FEATURE_EXTRACTOR_TASKS`中,则不需要加载`feature_extractor`。
if task in NO_FEATURE_EXTRACTOR_TASKS:
load_feature_extractor = False
# 如果任务在`NO_IMAGE_PROCESSOR_TASKS`中,则不需要加载`image_processor`。
if task in NO_IMAGE_PROCESSOR_TASKS:
load_image_processor = False
# 如果需要加载分词器
if load_tokenizer:
# 尝试根据模型名称或配置名称推断分词器(如果提供的话)
if tokenizer is None:
# 如果 model_name 是字符串,则尝试使用其作为分词器
if isinstance(model_name, str):
tokenizer = model_name
# 如果 config 是字符串,则尝试使用其作为分词器
elif isinstance(config, str):
tokenizer = config
else:
# 在这里无法猜测应该使用哪个分词器
raise Exception(
"Impossible to guess which tokenizer to use. "
"Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
)
# 如果需要,实例化分词器
if isinstance(tokenizer, (str, tuple)):
if isinstance(tokenizer, tuple):
# 对于元组,格式为(分词器名称,{kwargs})
use_fast = tokenizer[1].pop("use_fast", use_fast)
tokenizer_identifier = tokenizer[0]
tokenizer_kwargs = tokenizer[1]
else:
tokenizer_identifier = tokenizer
tokenizer_kwargs = model_kwargs.copy()
tokenizer_kwargs.pop("torch_dtype", None)
# 根据给定的参数创建 AutoTokenizer 实例
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs
)
# 如果需要加载图像处理器
if load_image_processor:
# 尝试根据模型名称或配置名称推断图像处理器(如果提供的话)
if image_processor is None:
# 如果 model_name 是字符串,则尝试使用其作为图像处理器
if isinstance(model_name, str):
image_processor = model_name
# 如果 config 是字符串,则尝试使用其作为图像处理器
elif isinstance(config, str):
image_processor = config
# 为了向后兼容,如果 feature_extractor 是 BaseImageProcessor 的实例,则使用其作为图像处理器
elif feature_extractor is not None and isinstance(feature_extractor, BaseImageProcessor):
image_processor = feature_extractor
else:
# 在这里无法猜测应该使用哪个图像处理器
raise Exception(
"Impossible to guess which image processor to use. "
"Please provide a PreTrainedImageProcessor class or a path/identifier "
"to a pretrained image processor."
)
# 如果需要,实例化图像处理器
if isinstance(image_processor, (str, tuple)):
# 根据给定的参数创建 AutoImageProcessor 实例
image_processor = AutoImageProcessor.from_pretrained(
image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs
)
# 如果需要加载特征提取器
if load_feature_extractor:
# 尝试从模型名称或配置名称(如果是字符串)推断特征提取器
if feature_extractor is None:
# 如果模型名称是字符串,则将其作为特征提取器
if isinstance(model_name, str):
feature_extractor = model_name
# 如果配置是字符串,则将其作为特征提取器
elif isinstance(config, str):
feature_extractor = config
else:
# 在此无法猜测正确的特征提取器
raise Exception(
"Impossible to guess which feature extractor to use. "
"Please provide a PreTrainedFeatureExtractor class or a path/identifier "
"to a pretrained feature extractor."
)
# 如果特征提取器是字符串或元组,则实例化特征提取器
if isinstance(feature_extractor, (str, tuple)):
feature_extractor = AutoFeatureExtractor.from_pretrained(
feature_extractor, _from_pipeline=task, **hub_kwargs, **model_kwargs
)
# 如果特征提取器包含语言模型且模型名称是字符串
if (
feature_extractor._processor_class
and feature_extractor._processor_class.endswith("WithLM")
and isinstance(model_name, str)
):
try:
import kenlm # 触发 `ImportError` 如果未安装
from pyctcdecode import BeamSearchDecoderCTC
# 如果模型名称是目录或文件
if os.path.isdir(model_name) or os.path.isfile(model_name):
decoder = BeamSearchDecoderCTC.load_from_dir(model_name)
else:
# 语言模型的全局路径及字母表文件名
language_model_glob = os.path.join(
BeamSearchDecoderCTC._LANGUAGE_MODEL_SERIALIZED_DIRECTORY, "*"
)
alphabet_filename = BeamSearchDecoderCTC._ALPHABET_SERIALIZED_FILENAME
allow_patterns = [language_model_glob, alphabet_filename]
# 从 HF Hub 加载模型名称对应的解码器
decoder = BeamSearchDecoderCTC.load_from_hf_hub(model_name, allow_patterns=allow_patterns)
# 将解码器加入参数中
kwargs["decoder"] = decoder
except ImportError as e:
# 如果无法加载 `decoder`,则记录警告信息,并默认使用原始 CTC
logger.warning(f"Could not load the `decoder` for {model_name}. Defaulting to raw CTC. Error: {e}")
# 如果未安装 kenlm
if not is_kenlm_available():
logger.warning("Try to install `kenlm`: `pip install kenlm")
# 如果未安装 pyctcdecode
if not is_pyctcdecode_available():
logger.warning("Try to install `pyctcdecode`: `pip install pyctcdecode")
# 如果任务是翻译且模型配置具有特定任务参数
if task == "translation" and model.config.task_specific_params:
# 遍历模型配置的特定任务参数
for key in model.config.task_specific_params:
# 如果参数以 "translation" 开头
if key.startswith("translation"):
# 将任务设为该参数值,并发出警告
task = key
warnings.warn(
f'"translation" task was used, instead of "translation_XX_to_YY", defaulting to "{task}"',
UserWarning,
)
break
# 如果存在分词器,则将其加入参数中
if tokenizer is not None:
kwargs["tokenizer"] = tokenizer
# 如果提供了特征提取器,则将其添加到 kwargs 字典中
if feature_extractor is not None:
kwargs["feature_extractor"] = feature_extractor
# 如果提供了 torch 的数据类型,则将其添加到 kwargs 字典中
if torch_dtype is not None:
kwargs["torch_dtype"] = torch_dtype
# 如果提供了图像处理器,则将其添加到 kwargs 字典中
if image_processor is not None:
kwargs["image_processor"] = image_processor
# 如果提供了设备信息,则将其添加到 kwargs 字典中
if device is not None:
kwargs["device"] = device
# 使用给定的参数和 kwargs 字典创建一个新的 pipeline_class 对象并返回
return pipeline_class(model=model, framework=framework, task=task, **kwargs)