背景
目前在我们的业务中,运行一个边缘计算算法的时候,会以一个workflow的形式进行运行.也就是有向无环图DAG.
在DAG当前,每个节点代表了评估,识别,合并的算法插件。需要将上一个算法结果的输入带下来.
这在边缘端资源受限的情况下,是需要对资源做控制的。所以我们第一个方案考虑的是基于Openfaas做改造. 那在此之前,我们需要把DAG转成一个单向链表进行处理. 所以我们分为两步走
- 把DAG转换成单向链表
- OpenFaas实现kafka数据源
- OpenFaas支持有状态计算
把DAG转换成单向链表
package main
import (
"encoding/json"
"errors"
"fmt"
"os"
)
// Vertex 表示图中的一个节点
type Vertex struct {
ID int // 节点的唯一标识
Next *Vertex // 单向链表的下一个节点
Data *NodeData // 节点数据
}
// Edge 表示图中的一条边
type Edge struct {
ID int // 边的唯一标识
From int // 起点ID
To int // 终点ID
Weight int // 边的权重
}
// Graph 表示一个有向无环图(DAG)
type Graph struct {
Vertices map[int]*Vertex
Edges map[int]*Edge
AdjList map[int]map[int]*Edge
}
// NodeData 节点数据结构,用于存储从JSON解析出的节点信息
type NodeData struct {
ID string `json:"id"`
Label string `json:"label"`
IconType string `json:"iconType"`
ClassName string `json:"className"`
Top string `json:"top"`
Left string `json:"left"`
FlinkParam FlinkParam `json:"flink_param"`
Endpoints []Endpoints `json:"endpoints"`
}
// FlinkParam Flink参数结构
type FlinkParam struct {
Flink Flink `json:"flink"`
Plugin Plugin `json:"plugin"`
Type string `json:"type"`
}
// Flink Flink详细参数结构
type Flink struct {
Watermark Watermark `json:"watermark"`
Param map[string]string `json:"param"`
SubscribeOutputTags []string `json:"subscribe_output_tags"`
Parallelism int `json:"parallelism"`
KeyClassName string `json:"key_class_name"`
OutputTagsRule []OutputTagsRule `json:"output_tags_rule"`
Type string `json:"type"`
ClassName string `json:"class_name"`
}
// Watermark 水印参数结构
type Watermark struct {
Type string `json:"type"`
TimeField string `json:"time_field"`
Duration int `json:"duration"`
}
// OutputTagsRule 输出标签规则结构
type OutputTagsRule struct {
Name string `json:"name"`
Rule string `json:"rule"`
}
// Plugin 插件参数结构
type Plugin struct {
Param map[string]string `json:"param"`
Type string `json:"type"`
ClassName string `json:"class_name"`
}
// Endpoints 连接点结构
type Endpoints struct {
ID string `json:"id"`
Orientation []int `json:"orientation"`
Pos []int `json:"pos"`
}
// JSONData 存储从JSON解析出的节点数组
type JSONData struct {
Nodes []NodeData `json:"nodes"`
}
// NewGraph 创建一个新的图
func NewGraph() *Graph {
return &Graph{
Vertices: make(map[int]*Vertex),
Edges: make(map[int]*Edge),
AdjList: make(map[int]map[int]*Edge),
}
}
// AddVertex 添加一个节点
func (g *Graph) AddVertex(id int, data *NodeData) error {
if _, exists := g.Vertices[id]; exists {
return errors.New("vertex already exists")
}
g.Vertices[id] = &Vertex{ID: id, Data: data}
g.AdjList[id] = make(map[int]*Edge)
return nil
}
// AddEdge 添加一条边
func (g *Graph) AddEdge(id, from, to, weight int) error {
if _, exists := g.Vertices[from]; !exists {
return errors.New("source vertex does not exist")
}
if _, exists := g.Vertices[to]; !exists {
return errors.New("target vertex does not exist")
}
if _, exists := g.Edges[id]; exists {
return errors.New("edge already exists")
}
if g.hasCycle(from, to) {
return errors.New("adding this edge would create a cycle")
}
edge := &Edge{
ID: id,
From: from,
To: to,
Weight: weight,
}
g.Edges[id] = edge
g.AdjList[from][to] = edge
return nil
}
// hasCycle 检查添加边是否会形成环
func (g *Graph) hasCycle(from, to int) bool {
visited := make(map[int]bool)
stack := make(map[int]bool)
return g.detectCycle(to, visited, stack)
}
// detectCycle 使用DFS检测环
func (g *Graph) detectCycle(node int, visited, stack map[int]bool) bool {
if stack[node] {
return true
}
if visited[node] {
return false
}
visited[node] = true
stack[node] = true
for neighbor := range g.AdjList[node] {
if g.detectCycle(neighbor, visited, stack) {
return true
}
}
stack[node] = false
return false
}
// 解析JSON文件并构建图
func BuildGraphFromJSON(filePath string) (*Graph, error) {
// 读取JSON文件
data, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
// 解析JSON数据
var jsonData JSONData
err = json.Unmarshal(data, &jsonData)
if err != nil {
return nil, err
}
// 创建图
graph := NewGraph()
// 添加节点
for _, node := range jsonData.Nodes {
id, _ := fmt.Sscanf(node.ID, "%d")
graph.AddVertex(id, &node)
}
// 添加边(根据输出标签和输入标签)
for _, fromNode := range jsonData.Nodes {
fromID, _ := fmt.Sscanf(fromNode.ID, "%d")
for _, toNode := range jsonData.Nodes {
toID, _ := fmt.Sscanf(toNode.ID, "%d")
for _, outputTag := range fromNode.FlinkParam.Flink.OutputTagsRule {
for _, inputTag := range toNode.FlinkParam.Flink.SubscribeOutputTags {
if outputTag.Name == inputTag {
graph.AddEdge(len(graph.Edges)+1, fromID, toID, 1)
}
}
}
}
}
return graph, nil
}
// TopologicalSort 返回图的拓扑排序
func (g *Graph) TopologicalSort() ([]int, error) {
// 计算每个节点的入度
inDegree := make(map[int]int)
for id := range g.Vertices {
inDegree[id] = 0
}
for _, edge := range g.Edges {
inDegree[edge.To]++
}
// 找到入度为 0 的节点(起点)
queue := make([]int, 0)
for id, degree := range inDegree {
if degree == 0 {
queue = append(queue, id)
}
}
// 拓扑排序
result := make([]int, 0)
for len(queue) > 0 {
current := queue[0]
queue = queue[1:]
result = append(result, current)
// 减少邻居节点的入度
for neighbor := range g.AdjList[current] {
inDegree[neighbor]--
if inDegree[neighbor] == 0 {
queue = append(queue, neighbor)
}
}
}
// 检查是否有环
if len(result) != len(g.Vertices) {
return nil, errors.New("graph has a cycle")
}
return result, nil
}
//ToLinkedList 将图转换为单向链表
func (g *Graph) ToLinkedList() (*Vertex, error) {
// 获取拓扑排序
sorted, err := g.TopologicalSort()
if err != nil {
return sorted
}
return nil
}
OpenFaas实现kafka数据源
基于OpenFaaS现有的数据源架构,创建一个Kafka数据源实现,遵循与现有数据源相同的接口模式。
OpenFaas现有架构
sequenceDiagram
participant Client as "客户端"
participant Gateway as "OpenFaaS Gateway"
participant Provider as "Function Provider"
participant Function as "Function实例"
participant NATS as "NATS队列"
participant Prometheus as "Prometheus"
participant UI as "Web UI"
Note over Client,UI: 1. 同步函数调用流程
Client->>Gateway: HTTP请求 /function/{name}
Gateway->>Provider: 转发请求到Provider
Provider->>Function: 路由到函数实例
Function->>Provider: 返回响应
Provider->>Gateway: 返回响应
Gateway->>Client: 返回最终响应
Gateway->>Prometheus: 发送指标数据
Note over Client,UI: 2. 异步函数调用流程
Client->>Gateway: HTTP请求 /async-function/{name}
Gateway->>NATS: 将请求加入队列
Gateway->>Client: 返回202 Accepted
NATS->>Provider: Queue Worker处理队列请求
Provider->>Function: 执行函数
Function->>Provider: 返回结果
opt 如果有回调URL
Provider->>Client: 回调结果
end
Note over Client,UI: 3. 函数管理流程
Client->>Gateway: 部署函数 POST /system/functions
Gateway->>Provider: 转发部署请求
Provider->>Provider: 创建函数实例
Provider->>Gateway: 返回部署状态
Gateway->>Client: 返回结果
Note over Client,UI: 4. 监控和扩缩容
Gateway->>Prometheus: 持续发送指标
Provider->>Prometheus: 发送函数指标
Gateway->>Provider: 根据负载进行扩缩容
Provider->>Function: 创建/销毁实例
Note over Client,UI: 5. Web UI交互
UI->>Gateway: 获取函数列表 /system/functions
Gateway->>Provider: 转发请求
Provider->>Gateway: 返回函数信息
Gateway->>UI: 返回函数列表
1. Kafka数据源接口定义
首先,我们需要定义Kafka数据源的接口,类似于现有的 service_query.go:8-12 :
// gateway/kafka/kafka_source.go
package kafka
import (
"context"
"log"
"time"
"github.com/Shopify/sarama"
"github.com/openfaas/faas/gateway/scaling"
)
// KafkaDataSource implements a Kafka-based data source for OpenFaaS
type KafkaDataSource struct {
consumer sarama.Consumer
brokers []string
topics []string
groupID string
credentials *KafkaCredentials
}
type KafkaCredentials struct {
Username string
Password string
UseSASL bool
}
type KafkaSourceConfig struct {
Brokers []string
Topics []string
GroupID string
Credentials *KafkaCredentials
}
2. Kafka数据源核心实现
// NewKafkaDataSource creates a new Kafka data source
func NewKafkaDataSource(config KafkaSourceConfig) (*KafkaDataSource, error) {
saramaConfig := sarama.NewConfig()
saramaConfig.Consumer.Group.Rebalance.Strategy = sarama.BalanceStrategyRoundRobin
saramaConfig.Consumer.Offsets.Initial = sarama.OffsetNewest
if config.Credentials != nil && config.Credentials.UseSASL {
saramaConfig.Net.SASL.Enable = true
saramaConfig.Net.SASL.User = config.Credentials.Username
saramaConfig.Net.SASL.Password = config.Credentials.Password
saramaConfig.Net.SASL.Mechanism = sarama.SASLTypePlaintext
}
consumer, err := sarama.NewConsumer(config.Brokers, saramaConfig)
if err != nil {
return nil, err
}
return &KafkaDataSource{
consumer: consumer,
brokers: config.Brokers,
topics: config.Topics,
groupID: config.GroupID,
credentials: config.Credentials,
}, nil
}
// StartMessageConsumer starts consuming messages from Kafka topics
func (k *KafkaDataSource) StartMessageConsumer(ctx context.Context, messageHandler func([]byte) error) error {
for _, topic := range k.topics {
partitions, err := k.consumer.Partitions(topic)
if err != nil {
log.Printf("Error getting partitions for topic %s: %v", topic, err)
continue
}
for _, partition := range partitions {
go k.consumePartition(ctx, topic, partition, messageHandler)
}
}
return nil
}
func (k *KafkaDataSource) consumePartition(ctx context.Context, topic string, partition int32, messageHandler func([]byte) error) {
partitionConsumer, err := k.consumer.ConsumePartition(topic, partition, sarama.OffsetNewest)
if err != nil {
log.Printf("Error creating partition consumer: %v", err)
return
}
defer partitionConsumer.Close()
for {
select {
case message := <-partitionConsumer.Messages():
if message != nil {
if err := messageHandler(message.Value); err != nil {
log.Printf("Error handling message: %v", err)
}
}
case err := <-partitionConsumer.Errors():
log.Printf("Kafka consumer error: %v", err)
case <-ctx.Done():
return
}
}
}
// Close closes the Kafka consumer
func (k *KafkaDataSource) Close() error {
return k.consumer.Close()
}
3. 集成到OpenFaaS Gateway
现在我们需要将Kafka数据源集成到OpenFaaS的主要架构中,类似于现有的 main.go:106-119
// gateway/kafka/kafka_integration.go
package kafka
import (
"context"
"encoding/json"
"log"
"net/http"
"time"
"github.com/openfaas/faas/gateway/scaling"
"github.com/openfaas/faas/gateway/types"
)
// KafkaFunctionTrigger handles Kafka messages and triggers function invocations
type KafkaFunctionTrigger struct {
kafkaSource *KafkaDataSource
functionInvoker FunctionInvoker
ctx context.Context
cancel context.CancelFunc
}
type FunctionInvoker interface {
InvokeFunction(functionName string, namespace string, body []byte) error
}
type KafkaMessage struct {
FunctionName string `json:"function_name"`
Namespace string `json:"namespace"`
Payload json.RawMessage `json:"payload"`
Headers map[string]string `json:"headers,omitempty"`
}
// NewKafkaFunctionTrigger creates a new Kafka function trigger
func NewKafkaFunctionTrigger(config KafkaSourceConfig, invoker FunctionInvoker) (*KafkaFunctionTrigger, error) {
kafkaSource, err := NewKafkaDataSource(config)
if err != nil {
return nil, err
}
ctx, cancel := context.WithCancel(context.Background())
trigger := &KafkaFunctionTrigger{
kafkaSource: kafkaSource,
functionInvoker: invoker,
ctx: ctx,
cancel: cancel,
}
return trigger, nil
}
// Start begins consuming Kafka messages and triggering functions
func (k *KafkaFunctionTrigger) Start() error {
return k.kafkaSource.StartMessageConsumer(k.ctx, k.handleMessage)
}
func (k *KafkaFunctionTrigger) handleMessage(messageBytes []byte) error {
var kafkaMsg KafkaMessage
if err := json.Unmarshal(messageBytes, &kafkaMsg); err != nil {
log.Printf("Error unmarshaling Kafka message: %v", err)
return err
}
// Invoke the specified function
return k.functionInvoker.InvokeFunction(kafkaMsg.FunctionName, kafkaMsg.Namespace, kafkaMsg.Payload)
}
// Stop stops the Kafka consumer
func (k *KafkaFunctionTrigger) Stop() error {
k.cancel()
return k.kafkaSource.Close()
}
4. HTTP函数调用器实现
// gateway/kafka/function_invoker.go
package kafka
import (
"bytes"
"fmt"
"io"
"net/http"
"time"
)
// HTTPFunctionInvoker implements FunctionInvoker using HTTP calls
type HTTPFunctionInvoker struct {
gatewayURL string
client *http.Client
timeout time.Duration
}
// NewHTTPFunctionInvoker creates a new HTTP-based function invoker
func NewHTTPFunctionInvoker(gatewayURL string, timeout time.Duration) *HTTPFunctionInvoker {
return &HTTPFunctionInvoker{
gatewayURL: gatewayURL,
client: &http.Client{
Timeout: timeout,
},
timeout: timeout,
}
}
// InvokeFunction invokes a function via HTTP
func (h *HTTPFunctionInvoker) InvokeFunction(functionName string, namespace string, body []byte) error {
var url string
if namespace != "" {
url = fmt.Sprintf("%s/function/%s.%s", h.gatewayURL, functionName, namespace)
} else {
url = fmt.Sprintf("%s/function/%s", h.gatewayURL, functionName)
}
req, err := http.NewRequest("POST", url, bytes.NewReader(body))
if err != nil {
return fmt.Errorf("error creating request: %v", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Kafka-Trigger", "true")
resp, err := h.client.Do(req)
if err != nil {
return fmt.Errorf("error invoking function: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("function invocation failed with status %d: %s", resp.StatusCode, string(body))
}
return nil
}
5. 集成到主Gateway
最后,我们需要修改 main.go:70-73 来包含Kafka数据源:
// 在 gateway/main.go 中添加
import (
"github.com/openfaas/faas/gateway/kafka"
)
// 在main函数中添加Kafka配置和启动逻辑
func main() {
// ... 现有代码 ...
// Kafka配置
if config.UseKafka() {
log.Println("Kafka trigger enabled")
kafkaConfig := kafka.KafkaSourceConfig{
Brokers: config.KafkaBrokers,
Topics: config.KafkaTopics,
GroupID: config.KafkaGroupID,
Credentials: &kafka.KafkaCredentials{
Username: config.KafkaUsername,
Password: config.KafkaPassword,
UseSASL: config.KafkaUseSASL,
},
}
functionInvoker := kafka.NewHTTPFunctionInvoker(
config.FunctionsProviderURL.String(),
config.UpstreamTimeout,
)
kafkaTrigger, err := kafka.NewKafkaFunctionTrigger(kafkaConfig, functionInvoker)
if err != nil {
log.Fatalf("Failed to create Kafka trigger: %v", err)
}
go func() {
if err := kafkaTrigger.Start(); err != nil {
log.Printf("Kafka trigger error: %v", err)
}
}()
// 优雅关闭
defer kafkaTrigger.Stop()
}
// ... 现有代码继续 ...
}
使用示例
要使用这个Kafka数据源,需要:
1. 配置环境变量:
export kafka_brokers="localhost:9092"
export kafka_topics="openfaas-triggers"
export kafka_group_id="openfaas-gateway"
export kafka_username="your-username"
export kafka_password="your-password"
export kafka_use_sasl="true"
2. 发送Kafka消息:
{
"function_name": "echo",
"namespace": "openfaas-fn",
"payload": {"message": "Hello from Kafka!"},
"headers": {"content-type": "application/json"}
}
这个实现基于OpenFaaS现有的架构模式,参考了 exporter.go:80-122 中的服务监控模式和 queue_proxy.go:24-68 中的异步处理模式。提供了一个Kafka数据源实现,可以无缝集成到OpenFaaS Gateway中,支持通过Kafka消息触发函数执行
OpenFaas支持有状态计算
实现Python计算任务的中间变量存储系统,利用OpenFaaS现有的NATS KeyValue存储接口和插件架构。
前置知识
Nats是OpenFaas的一个存储接口
NATS存储类型
● 1. 文件存储 (FileStorage)
这是NATS JetStream的默认存储类型,数据持久化到磁盘上。适用于需要数据持久性的场景。
● 2. 内存存储 (MemoryStorage)
数据仅存储在内存中,提供更快的访问速度但不具备持久性。
KeyValue存储可以通过以下配置进行定制: kv.go:250-268
1. 存储管理器实现
首先,基于现有的NATS KeyValue接口 ,实现一个Python任务存储管理器:
// gateway/storage/task_storage.go
package storage
import (
"context"
"encoding/json"
"fmt"
"log"
"time"
"github.com/nats-io/nats.go"
)
// PythonTaskStorage manages intermediate variables for Python computation tasks
type PythonTaskStorage struct {
kv nats.KeyValue
js nats.JetStreamContext
taskBucket string
varBucket string
}
type TaskVariable struct {
TaskID string `json:"task_id"`
VarName string `json:"var_name"`
VarType string `json:"var_type"`
Value interface{} `json:"value"`
Timestamp time.Time `json:"timestamp"`
TTL time.Duration `json:"ttl,omitempty"`
}
type TaskMetadata struct {
TaskID string `json:"task_id"`
FunctionName string `json:"function_name"`
Status string `json:"status"`
Variables map[string]string `json:"variables"` // var_name -> key mapping
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// NewPythonTaskStorage creates a new task storage manager
func NewPythonTaskStorage(nc *nats.Conn) (*PythonTaskStorage, error) {
js, err := nc.JetStream()
if err != nil {
return nil, fmt.Errorf("failed to get JetStream context: %v", err)
}
// Create task metadata bucket
taskKV, err := js.CreateKeyValue(&nats.KeyValueConfig{
Bucket: "python-tasks",
Description: "Python task metadata storage",
TTL: 24 * time.Hour,
History: 5,
Storage: nats.FileStorage,
})
if err != nil {
// Try to get existing bucket
taskKV, err = js.KeyValue("python-tasks")
if err != nil {
return nil, fmt.Errorf("failed to create/get task bucket: %v", err)
}
}
// Create variables bucket
varKV, err := js.CreateKeyValue(&nats.KeyValueConfig{
Bucket: "python-variables",
Description: "Python task variables storage",
TTL: 24 * time.Hour,
History: 10,
Storage: nats.FileStorage,
})
if err != nil {
varKV, err = js.KeyValue("python-variables")
if err != nil {
return nil, fmt.Errorf("failed to create/get variables bucket: %v", err)
}
}
return &PythonTaskStorage{
kv: taskKV,
js: js,
taskBucket: "python-tasks",
varBucket: "python-variables",
}, nil
}
// CreateTask creates a new Python computation task
func (pts *PythonTaskStorage) CreateTask(taskID, functionName string) error {
metadata := TaskMetadata{
TaskID: taskID,
FunctionName: functionName,
Status: "created",
Variables: make(map[string]string),
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
data, err := json.Marshal(metadata)
if err != nil {
return fmt.Errorf("failed to marshal task metadata: %v", err)
}
_, err = pts.kv.Put(taskID, data)
return err
}
// StoreVariable stores an intermediate variable for a task
func (pts *PythonTaskStorage) StoreVariable(taskID, varName string, value interface{}, varType string, ttl time.Duration) error {
variable := TaskVariable{
TaskID: taskID,
VarName: varName,
VarType: varType,
Value: value,
Timestamp: time.Now(),
TTL: ttl,
}
data, err := json.Marshal(variable)
if err != nil {
return fmt.Errorf("failed to marshal variable: %v", err)
}
varKey := fmt.Sprintf("%s:%s", taskID, varName)
// Store in variables bucket
varKV, err := pts.js.KeyValue(pts.varBucket)
if err != nil {
return fmt.Errorf("failed to get variables bucket: %v", err)
}
_, err = varKV.Put(varKey, data)
if err != nil {
return fmt.Errorf("failed to store variable: %v", err)
}
// Update task metadata
return pts.updateTaskVariables(taskID, varName, varKey)
}
// GetVariable retrieves an intermediate variable
func (pts *PythonTaskStorage) GetVariable(taskID, varName string) (*TaskVariable, error) {
varKey := fmt.Sprintf("%s:%s", taskID, varName)
varKV, err := pts.js.KeyValue(pts.varBucket)
if err != nil {
return nil, fmt.Errorf("failed to get variables bucket: %v", err)
}
entry, err := varKV.Get(varKey)
if err != nil {
return nil, fmt.Errorf("failed to get variable: %v", err)
}
var variable TaskVariable
if err := json.Unmarshal(entry.Value(), &variable); err != nil {
return nil, fmt.Errorf("failed to unmarshal variable: %v", err)
}
return &variable, nil
}
func (pts *PythonTaskStorage) updateTaskVariables(taskID, varName, varKey string) error {
entry, err := pts.kv.Get(taskID)
if err != nil {
return fmt.Errorf("failed to get task metadata: %v", err)
}
var metadata TaskMetadata
if err := json.Unmarshal(entry.Value(), &metadata); err != nil {
return fmt.Errorf("failed to unmarshal task metadata: %v", err)
}
metadata.Variables[varName] = varKey
metadata.UpdatedAt = time.Now()
data, err := json.Marshal(metadata)
if err != nil {
return fmt.Errorf("failed to marshal updated metadata: %v", err)
}
_, err = pts.kv.Update(taskID, data, entry.Revision())
return err
}
2. HTTP API处理器
基于现有的外部服务查询架构 ,实现存储API处理器:
// gateway/storage/storage_handler.go
package storage
import (
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/gorilla/mux"
"github.com/openfaas/faas/gateway/middleware"
)
// StorageHandler handles HTTP requests for task storage
type StorageHandler struct {
storage *PythonTaskStorage
authInjector middleware.AuthInjector
}
// NewStorageHandler creates a new storage handler
func NewStorageHandler(storage *PythonTaskStorage, authInjector middleware.AuthInjector) *StorageHandler {
return &StorageHandler{
storage: storage,
authInjector: authInjector,
}
}
// StoreVariableRequest represents a variable storage request
type StoreVariableRequest struct {
TaskID string `json:"task_id"`
VarName string `json:"var_name"`
VarType string `json:"var_type"`
Value interface{} `json:"value"`
TTL string `json:"ttl,omitempty"`
}
// GetVariableResponse represents a variable retrieval response
type GetVariableResponse struct {
TaskID string `json:"task_id"`
VarName string `json:"var_name"`
VarType string `json:"var_type"`
Value interface{} `json:"value"`
Timestamp time.Time `json:"timestamp"`
}
// HandleStoreVariable handles variable storage requests
func (sh *StorageHandler) HandleStoreVariable(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
var req StoreVariableRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, fmt.Sprintf("Invalid request body: %v", err), http.StatusBadRequest)
return
}
// Parse TTL if provided
var ttl time.Duration
if req.TTL != "" {
var err error
ttl, err = time.ParseDuration(req.TTL)
if err != nil {
http.Error(w, fmt.Sprintf("Invalid TTL format: %v", err), http.StatusBadRequest)
return
}
}
// Store the variable
if err := sh.storage.StoreVariable(req.TaskID, req.VarName, req.Value, req.VarType, ttl); err != nil {
http.Error(w, fmt.Sprintf("Failed to store variable: %v", err), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusCreated)
json.NewEncoder(w).Encode(map[string]string{"status": "stored"})
}
// HandleGetVariable handles variable retrieval requests
func (sh *StorageHandler) HandleGetVariable(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
vars := mux.Vars(r)
taskID := vars["taskId"]
varName := vars["varName"]
if taskID == "" || varName == "" {
http.Error(w, "Missing taskId or varName", http.StatusBadRequest)
return
}
variable, err := sh.storage.GetVariable(taskID, varName)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get variable: %v", err), http.StatusNotFound)
return
}
response := GetVariableResponse{
TaskID: variable.TaskID,
VarName: variable.VarName,
VarType: variable.VarType,
Value: variable.Value,
Timestamp: variable.Timestamp,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
// HandleCreateTask handles task creation requests
func (sh *StorageHandler) HandleCreateTask(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
var req struct {
TaskID string `json:"task_id"`
FunctionName string `json:"function_name"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, fmt.Sprintf("Invalid request body: %v", err), http.StatusBadRequest)
return
}
if err := sh.storage.CreateTask(req.TaskID, req.FunctionName); err != nil {
http.Error(w, fmt.Sprintf("Failed to create task: %v", err), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusCreated)
json.NewEncoder(w).Encode(map[string]string{"status": "created"})
}
3. 集成到主Gateway
基于现有的缓存和查询架构 ,将存储系统集成到主Gateway:
// 在 gateway/main.go 中添加存储系统初始化
import (
"github.com/openfaas/faas/gateway/storage"
"github.com/nats-io/nats.go"
)
func main() {
// ... 现有代码 ...
// 初始化NATS连接
natsURL := os.Getenv("nats_url")
if natsURL == "" {
natsURL = "nats://localhost:4222"
}
nc, err := nats.Connect(natsURL)
if err != nil {
log.Fatalf("Failed to connect to NATS: %v", err)
}
defer nc.Close()
// 初始化Python任务存储
taskStorage, err := storage.NewPythonTaskStorage(nc)
if err != nil {
log.Fatalf("Failed to initialize task storage: %v", err)
}
// 创建存储处理器
storageHandler := storage.NewStorageHandler(taskStorage, serviceAuthInjector)
// 添加存储API路由
r.HandleFunc("/system/storage/tasks", storageHandler.HandleCreateTask)
r.HandleFunc("/system/storage/variables", storageHandler.HandleStoreVariable)
r.HandleFunc("/system/storage/variables/{taskId}/{varName}", storageHandler.HandleGetVariable)
// ... 现有代码继续 ...
}
4. Python函数客户端库
# python_client/openfaas_storage.py
import json
import requests
import os
import pickle
import base64
from typing import Any, Optional, Dict, Union
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
class OpenFaaSStorage:
"""OpenFaaS存储客户端,用于Python函数中管理中间变量"""
def __init__(self, gateway_url: Optional[str] = None, task_id: Optional[str] = None):
self.gateway_url = gateway_url or os.getenv('OPENFAAS_GATEWAY_URL', 'http://gateway:8080')
self.task_id = task_id or os.getenv('TASK_ID')
if not self.task_id:
raise ValueError("TASK_ID environment variable is required")
# 设置HTTP会话
self.session = requests.Session()
self.session.headers.update({
'Content-Type': 'application/json',
'User-Agent': 'OpenFaaS-Python-Storage-Client/1.0'
})
# 如果有认证信息,添加到会话中
auth_user = os.getenv('OPENFAAS_AUTH_USER')
auth_pass = os.getenv('OPENFAAS_AUTH_PASS')
if auth_user and auth_pass:
self.session.auth = (auth_user, auth_pass)
def create_task(self, function_name: str) -> bool:
"""创建新的计算任务"""
url = f"{self.gateway_url}/system/storage/tasks"
payload = {
"task_id": self.task_id,
"function_name": function_name
}
try:
response = self.session.post(url, json=payload, timeout=10)
response.raise_for_status()
logger.info(f"Task {self.task_id} created successfully")
return True
except requests.RequestException as e:
logger.error(f"Failed to create task: {e}")
return False
def store_variable(self, var_name: str, value: Any, var_type: Optional[str] = None,
ttl: Optional[str] = None, serialize: bool = True) -> bool:
"""存储中间变量到外部存储"""
url = f"{self.gateway_url}/system/storage/variables"
# 自动检测变量类型
if var_type is None:
var_type = type(value).__name__
# 序列化复杂对象
if serialize and not isinstance(value, (str, int, float, bool, type(None))):
try:
# 使用pickle序列化,然后base64编码
serialized = pickle.dumps(value)
encoded_value = base64.b64encode(serialized).decode('utf-8')
var_type = f"pickled_{var_type}"
value = encoded_value
except Exception as e:
logger.error(f"Failed to serialize variable {var_name}: {e}")
return False
payload = {
"task_id": self.task_id,
"var_name": var_name,
"var_type": var_type,
"value": value
}
if ttl:
payload["ttl"] = ttl
try:
response = self.session.post(url, json=payload, timeout=10)
response.raise_for_status()
logger.info(f"Variable {var_name} stored successfully")
return True
except requests.RequestException as e:
logger.error(f"Failed to store variable {var_name}: {e}")
return False
def get_variable(self, var_name: str, deserialize: bool = True) -> Optional[Any]:
"""从外部存储获取中间变量"""
url = f"{self.gateway_url}/system/storage/variables/{self.task_id}/{var_name}"
try:
response = self.session.get(url, timeout=10)
response.raise_for_status()
data = response.json()
value = data.get('value')
var_type = data.get('var_type', '')
# 反序列化pickled对象
if deserialize and var_type.startswith('pickled_'):
try:
decoded = base64.b64decode(value.encode('utf-8'))
value = pickle.loads(decoded)
except Exception as e:
logger.error(f"Failed to deserialize variable {var_name}: {e}")
return None
logger.info(f"Variable {var_name} retrieved successfully")
return value
except requests.RequestException as e:
logger.error(f"Failed to get variable {var_name}: {e}")
return None
def store_dataframe(self, var_name: str, df, ttl: Optional[str] = None) -> bool:
"""专门用于存储pandas DataFrame"""
try:
import pandas as pd
if isinstance(df, pd.DataFrame):
# 转换为JSON格式存储
json_data = df.to_json(orient='records')
return self.store_variable(var_name, json_data, 'dataframe_json', ttl, serialize=False)
except ImportError:
logger.warning("pandas not available, falling back to pickle serialization")
return self.store_variable(var_name, df, 'dataframe', ttl)
def get_dataframe(self, var_name: str):
"""专门用于获取pandas DataFrame"""
try:
import pandas as pd
data = self.get_variable(var_name, deserialize=False)
if data is None:
return None
# 如果是JSON格式的DataFrame
if isinstance(data, dict) and data.get('var_type') == 'dataframe_json':
return pd.read_json(data['value'], orient='records')
# 否则尝试反序列化
return self.get_variable(var_name, deserialize=True)
except ImportError:
logger.warning("pandas not available")
return self.get_variable(var_name, deserialize=True)
def store_numpy_array(self, var_name: str, array, ttl: Optional[str] = None) -> bool:
"""专门用于存储numpy数组"""
try:
import numpy as np
if isinstance(array, np.ndarray):
# 转换为列表存储
array_data = {
'data': array.tolist(),
'shape': array.shape,
'dtype': str(array.dtype)
}
return self.store_variable(var_name, array_data, 'numpy_array', ttl, serialize=False)
except ImportError:
logger.warning("numpy not available, falling back to pickle serialization")
return self.store_variable(var_name, array, 'numpy_array', ttl)
def get_numpy_array(self, var_name: str):
"""专门用于获取numpy数组"""
try:
import numpy as np
data = self.get_variable(var_name, deserialize=False)
if data is None:
return None
# 如果是结构化的numpy数据
if isinstance(data, dict) and 'data' in data and 'shape' in data:
return np.array(data['data'], dtype=data.get('dtype')).reshape(data['shape'])
# 否则尝试反序列化
return self.get_variable(var_name, deserialize=True)
except ImportError:
logger.warning("numpy not available")
return self.get_variable(var_name, deserialize=True)
# 托管存储装饰器
def with_storage(function_name: str = None):
"""装饰器,自动为函数提供存储功能"""
def decorator(func):
def wrapper(*args, **kwargs):
# 创建存储实例
storage = OpenFaaSStorage()
# 创建任务
func_name = function_name or func.__name__
storage.create_task(func_name)
# 将storage注入到函数参数中
kwargs['storage'] = storage
return func(*args, **kwargs)
return wrapper
return decorator
5. Python函数示例
# example_function/handler.py
import json
import numpy as np
import pandas as pd
from openfaas_storage import OpenFaaSStorage, with_storage
@with_storage("data-processing")
def handle(req, storage: OpenFaaSStorage):
"""示例Python计算函数,展示如何使用存储系统"""
try:
# 解析输入
input_data = json.loads(req) if isinstance(req, str) else req
step = input_data.get('step', 'start')
if step == 'start':
# 第一步:数据预处理
raw_data = input_data.get('data', [])
# 创建DataFrame
df = pd.DataFrame(raw_data)
storage.store_dataframe('raw_dataframe', df, ttl='1h')
# 创建numpy数组
array = np.array([1, 2, 3, 4, 5])
storage.store_numpy_array('processing_array', array, ttl='1h')
# 存储中间结果
intermediate_result = {'processed_count': len(raw_data)}
storage.store_variable('intermediate_result', intermediate_result, ttl='1h')
return {
'status': 'step1_complete',
'message': 'Data preprocessing completed',
'next_step': 'process'
}
elif step == 'process':
# 第二步:数据处理
df = storage.get_dataframe('raw_dataframe')
array = storage.get_numpy_array('processing_array')
intermediate = storage.get_variable('intermediate_result')
if df is None or array is None:
return {'error': 'Missing intermediate data'}
# 执行计算
processed_df = df.copy()
processed_df['computed'] = processed_df.iloc[:, 0] * array[0] if len(df) > 0 else 0
# 存储处理结果
storage.store_dataframe('processed_dataframe', processed_df, ttl='2h')
# 更新中间结果
intermediate['processing_complete'] = True
storage.store_variable('intermediate_result', intermediate, ttl='2h')
return {
'status': 'step2_complete',
'message': 'Data processing completed',
'next_step': 'finalize'
}
elif step == 'finalize':
# 第三步:最终化
processed_df = storage.get_dataframe('processed_dataframe')
intermediate = storage.get_variable('intermediate_result')
if processed_df is None:
return {'error': 'Missing processed data'}
# 生成最终结果
final_result = {
'total_rows': len(processed_df),
'computed_sum': processed_df['computed'].sum() if 'computed' in processed_df.columns else 0,
'metadata': intermediate
}
# 存储最终结果
storage.store_variable('final_result', final_result, ttl='24h')
return {
'status': 'complete',
'result': final_result
}
else:
return {'error': f'Unknown step: {step}'}
except Exception as e:
return {'error': str(e)}
# 不使用装饰器的版本
def handle_manual(req):
"""手动管理存储的示例"""
storage = OpenFaaSStorage()
# 手动创建任务
storage.create_task('manual-processing')
# 存储和获取变量
storage.store_variable('temp_var', {'key': 'value'})
result = storage.get_variable('temp_var')
return {'stored_and_retrieved': result}
6. Python函数req变量的mock数据示例
1. 基本的JSON请求数据
# 第一步:数据预处理
req_step1 = {
"step": "start",
"data": [
{"id": 1, "value": 10, "category": "A"},
{"id": 2, "value": 20, "category": "B"},
{"id": 3, "value": 15, "category": "A"},
{"id": 4, "value": 25, "category": "C"}
],
"task_config": {
"batch_size": 100,
"timeout": 300
}
}
2. 数据处理步骤的请求
# 第二步:数据处理
req_step2 = {
"step": "process",
"processing_options": {
"algorithm": "linear_regression",
"normalize": True,
"feature_columns": ["value"]
}
}
3. 最终化步骤的请求
# 第三步:最终化
req_step3 = {
"step": "finalize",
"output_format": "json",
"include_metadata": True,
"export_options": {
"compress": False,
"format": "csv"
}
}
4. 复杂数据结构的请求
# 包含numpy数组和pandas DataFrame数据的请求
req_complex = {
"step": "start",
"data": {
"matrix_data": [
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0]
],
"time_series": {
"timestamps": ["2023-01-01", "2023-01-02", "2023-01-03"],
"values": [100, 150, 120]
},
"metadata": {
"source": "sensor_data",
"version": "1.0",
"created_at": "2023-12-01T10:00:00Z"
}
},
"processing_params": {
"window_size": 5,
"overlap": 0.5,
"method": "moving_average"
}
}
5. 错误处理场景的请求
# 无效步骤的请求
req_invalid = {
"step": "unknown_step",
"data": []
}
# 缺少必要参数的请求
req_missing = {
"step": "process"
# 缺少data字段
}
6. 实际使用中的完整示例
# 模拟实际HTTP请求中的req变量
def test_python_function():
from handler import handle
# 测试第一步
result1 = handle(json.dumps(req_step1))
print("Step 1 result:", result1)
# 测试第二步
result2 = handle(json.dumps(req_step2))
print("Step 2 result:", result2)
# 测试第三步
result3 = handle(json.dumps(req_step3))
print("Step 3 result:", result3)
7.窗口算法Python函数实现
基于之前实现的存储系统,将创建一个支持多种窗口算法的Python函数:
# window_algorithm_function/handler.py
import json
import numpy as np
import pandas as pd
from typing import List, Dict, Any, Optional, Union
from openfaas_storage import OpenFaaSStorage, with_storage
class WindowAlgorithms:
"""窗口算法实现类"""
@staticmethod
def sliding_window(data: np.ndarray, window_size: int, step: int = 1) -> np.ndarray:
"""滑动窗口算法"""
if len(data) < window_size:
return np.array([data])
windows = []
for i in range(0, len(data) - window_size + 1, step):
windows.append(data[i:i + window_size])
return np.array(windows)
@staticmethod
def tumbling_window(data: np.ndarray, window_size: int) -> np.ndarray:
"""翻滚窗口算法"""
windows = []
for i in range(0, len(data), window_size):
window = data[i:i + window_size]
if len(window) == window_size: # 只保留完整窗口
windows.append(window)
return np.array(windows)
@staticmethod
def hopping_window(data: np.ndarray, window_size: int, hop_size: int) -> np.ndarray:
"""跳跃窗口算法"""
windows = []
for i in range(0, len(data) - window_size + 1, hop_size):
windows.append(data[i:i + window_size])
return np.array(windows)
@staticmethod
def session_window(data: np.ndarray, timestamps: np.ndarray, gap_threshold: float) -> List[np.ndarray]:
"""会话窗口算法"""
if len(data) != len(timestamps):
raise ValueError("Data and timestamps must have the same length")
sessions = []
current_session = [data[0]]
current_timestamps = [timestamps[0]]
for i in range(1, len(data)):
time_gap = timestamps[i] - timestamps[i-1]
if time_gap <= gap_threshold:
current_session.append(data[i])
current_timestamps.append(timestamps[i])
else:
sessions.append(np.array(current_session))
current_session = [data[i]]
current_timestamps = [timestamps[i]]
if current_session:
sessions.append(np.array(current_session))
return sessions
class WindowProcessor:
"""窗口处理器,集成各种窗口算法和聚合函数"""
def __init__(self, storage: OpenFaaSStorage):
self.storage = storage
self.algorithms = WindowAlgorithms()
def process_sliding_window(self, data: np.ndarray, config: Dict[str, Any]) -> Dict[str, Any]:
"""处理滑动窗口"""
window_size = config.get('window_size', 5)
step = config.get('step', 1)
aggregation = config.get('aggregation', 'mean')
windows = self.algorithms.sliding_window(data, window_size, step)
# 应用聚合函数
results = self._apply_aggregation(windows, aggregation)
# 存储中间结果
self.storage.store_numpy_array('sliding_windows', windows, ttl='1h')
self.storage.store_variable('sliding_results', results.tolist(), ttl='1h')
return {
'window_type': 'sliding',
'window_count': len(windows),
'aggregated_results': results.tolist(),
'config': config
}
def process_tumbling_window(self, data: np.ndarray, config: Dict[str, Any]) -> Dict[str, Any]:
"""处理翻滚窗口"""
window_size = config.get('window_size', 5)
aggregation = config.get('aggregation', 'mean')
windows = self.algorithms.tumbling_window(data, window_size)
results = self._apply_aggregation(windows, aggregation)
# 存储结果
self.storage.store_numpy_array('tumbling_windows', windows, ttl='1h')
self.storage.store_variable('tumbling_results', results.tolist(), ttl='1h')
return {
'window_type': 'tumbling',
'window_count': len(windows),
'aggregated_results': results.tolist(),
'config': config
}
def process_hopping_window(self, data: np.ndarray, config: Dict[str, Any]) -> Dict[str, Any]:
"""处理跳跃窗口"""
window_size = config.get('window_size', 5)
hop_size = config.get('hop_size', 2)
aggregation = config.get('aggregation', 'mean')
windows = self.algorithms.hopping_window(data, window_size, hop_size)
results = self._apply_aggregation(windows, aggregation)
# 存储结果
self.storage.store_numpy_array('hopping_windows', windows, ttl='1h')
self.storage.store_variable('hopping_results', results.tolist(), ttl='1h')
return {
'window_type': 'hopping',
'window_count': len(windows),
'aggregated_results': results.tolist(),
'config': config
}
def process_session_window(self, data: np.ndarray, timestamps: np.ndarray, config: Dict[str, Any]) -> Dict[str, Any]:
"""处理会话窗口"""
gap_threshold = config.get('gap_threshold', 5.0)
aggregation = config.get('aggregation', 'mean')
sessions = self.algorithms.session_window(data, timestamps, gap_threshold)
# 对每个会话应用聚合函数
session_results = []
for i, session in enumerate(sessions):
result = self._apply_aggregation(session.reshape(1, -1), aggregation)[0]
session_results.append(result)
# 存储每个会话
self.storage.store_numpy_array(f'session_{i}', session, ttl='1h')
self.storage.store_variable('session_results', session_results, ttl='1h')
return {
'window_type': 'session',
'session_count': len(sessions),
'session_sizes': [len(session) for session in sessions],
'aggregated_results': session_results,
'config': config
}
def _apply_aggregation(self, windows: np.ndarray, aggregation: str) -> np.ndarray:
"""应用聚合函数到窗口数据"""
if aggregation == 'mean':
return np.mean(windows, axis=1)
elif aggregation == 'sum':
return np.sum(windows, axis=1)
elif aggregation == 'max':
return np.max(windows, axis=1)
elif aggregation == 'min':
return np.min(windows, axis=1)
elif aggregation == 'std':
return np.std(windows, axis=1)
elif aggregation == 'var':
return np.var(windows, axis=1)
elif aggregation == 'median':
return np.median(windows, axis=1)
else:
raise ValueError(f"Unsupported aggregation function: {aggregation}")
@with_storage("window-algorithm-processor")
def handle(req, storage: OpenFaaSStorage):
"""主处理函数"""
try:
# 解析输入
input_data = json.loads(req) if isinstance(req, str) else req
# 获取处理步骤
step = input_data.get('step', 'process')
if step == 'process':
# 处理窗口算法
return process_window_algorithm(input_data, storage)
elif step == 'analyze':
# 分析已处理的窗口结果
return analyze_window_results(input_data, storage)
elif step == 'compare':
# 比较不同窗口算法的结果
return compare_window_algorithms(input_data, storage)
else:
return {'error': f'Unknown step: {step}'}
except Exception as e:
return {'error': str(e)}
def process_window_algorithm(input_data: Dict[str, Any], storage: OpenFaaSStorage) -> Dict[str, Any]:
"""处理窗口算法"""
# 获取输入数据
data_array = np.array(input_data.get('data', []))
timestamps = np.array(input_data.get('timestamps', []))
window_config = input_data.get('window_config', {})
window_type = window_config.get('type', 'sliding')
if len(data_array) == 0:
return {'error': 'No data provided'}
# 创建窗口处理器
processor = WindowProcessor(storage)
# 根据窗口类型处理
if window_type == 'sliding':
result = processor.process_sliding_window(data_array, window_config)
elif window_type == 'tumbling':
result = processor.process_tumbling_window(data_array, window_config)
elif window_type == 'hopping':
result = processor.process_hopping_window(data_array, window_config)
elif window_type == 'session':
if len(timestamps) == 0:
return {'error': 'Timestamps required for session windows'}
result = processor.process_session_window(data_array, timestamps, window_config)
else:
return {'error': f'Unsupported window type: {window_type}'}
# 存储原始数据
storage.store_numpy_array('original_data', data_array, ttl='2h')
if len(timestamps) > 0:
storage.store_numpy_array('timestamps', timestamps, ttl='2h')
return {
'status': 'success',
'message': f'{window_type.capitalize()} window processing completed',
'result': result,
'data_size': len(data_array)
}
def analyze_window_results(input_data: Dict[str, Any], storage: OpenFaaSStorage) -> Dict[str, Any]:
"""分析窗口处理结果"""
window_type = input_data.get('window_type', 'sliding')
# 获取存储的结果
results_key = f'{window_type}_results'
results = storage.get_variable(results_key)
if results is None:
return {'error': f'No results found for {window_type} windows'}
results_array = np.array(results)
# 计算统计信息
analysis = {
'window_type': window_type,
'total_windows': len(results_array),
'statistics': {
'mean': float(np.mean(results_array)),
'std': float(np.std(results_array)),
'min': float(np.min(results_array)),
'max': float(np.max(results_array)),
'median': float(np.median(results_array)),
'q25': float(np.percentile(results_array, 25)),
'q75': float(np.percentile(results_array, 75))
}
}
# 存储分析结果
storage.store_variable(f'{window_type}_analysis', analysis, ttl='2h')
return {
'status': 'success',
'message': f'{window_type.capitalize()} window analysis completed',
'analysis': analysis
}
def compare_window_algorithms(input_data: Dict[str, Any], storage: OpenFaaSStorage) -> Dict[str, Any]:
"""比较不同窗口算法的结果"""
window_types = input_data.get('window_types', ['sliding', 'tumbling', 'hopping'])
comparison = {}
for window_type in window_types:
results = storage.get_variable(f'{window_type}_results')
if results is not None:
results_array = np.array(results)
comparison[window_type] = {
'count': len(results_array),
'mean': float(np.mean(results_array)),
'std': float(np.std(results_array)),
'range': float(np.max(results_array) - np.min(results_array))
}
# 存储比较结果
storage.store_variable('window_comparison', comparison, ttl='2h')
return {
'status': 'success',
'message': 'Window algorithm comparison completed',
'comparison': comparison
}
使用示例
# 调用示例
import requests
import json
import numpy as np
# 生成测试数据
data = np.random.randn(100).tolist()
timestamps = np.arange(100).tolist()
# 滑动窗口处理
sliding_request = {
"step": "process",
"data": data,
"window_config": {
"type": "sliding",
"window_size": 10,
"step": 2,
"aggregation": "mean"
}
}
response = requests.post(
'http://gateway:8080/function/window-algorithm-processor',
json=sliding_request,
headers={'X-Task-ID': 'window-task-123'}
)
print("Sliding window result:", response.json())
# 会话窗口处理
session_request = {
"step": "process",
"data": data,
"timestamps": timestamps,
"window_config": {
"type": "session",
"gap_threshold": 5.0,
"aggregation": "mean"
}
}
response = requests.post(
'http://gateway:8080/function/window-algorithm-processor',
json=session_request,
headers={'X-Task-ID': 'window-task-123'}
)
print("Session window result:", response.json())
8. 配置文件
# function.yml
version: 1.0
provider:
name: openfaas
gateway: http://127.0.0.1:8080
functions:
data-processor:
lang: python3
handler: ./handler
image: data-processor:latest
environment:
OPENFAAS_GATEWAY_URL: "http://gateway:8080"
NATS_URL: "nats://nats:4222"
secrets:
- openfaas-auth
9. 部署脚本
#!/bin/bash
# deploy.sh
# 构建函数
faas-cli build -f function.yml
# 部署函数
faas-cli deploy -f function.yml
# 设置环境变量
kubectl create secret generic openfaas-auth \
--from-literal=username=admin \
--from-literal=password=password \
-n openfaas-fn
使用示例
# 调用示例
import requests
import json
# 第一步调用
response1 = requests.post(
'http://gateway:8080/function/data-processor',
json={
'step': 'start',
'data': [{'id': 1, 'value': 10}, {'id': 2, 'value': 20}]
},
headers={'X-Task-ID': 'task-123'}
)
# 第二步调用
response2 = requests.post(
'http://gateway:8080/function/data-processor',
json={'step': 'process'},
headers={'X-Task-ID': 'task-123'}
)
# 第三步调用
response3 = requests.post(
'http://gateway:8080/function/data-processor',
json={'step': 'finalize'},
headers={'X-Task-ID': 'task-123'}
)
print(response3.json())