arcpy原生脚本——“按属性分割”脚本解读

# coding=utf-8
"""
Source code for potential gp tool to create outputs based on attributes
of an input.
"""

import arcpy
import numbers
import sys

try:
   unicode 
except:
   unicode = str
# python3中已经没有unicode，所以这里是兼容性写法
"""
在 Python 的交互式解释器或 REPL（Read-Eval-Print Loop）环境中，
您可以输入单个表达式或语句，并立即看到结果。
这并不意味着这个单词是一个方法或函数；它只是一个标识符，
可能是变量名、类型名、函数名等。
如果您尝试访问一个已定义的变量、类型或函数，
并且它存在于当前的命名空间中，
那么解释器会返回该对象的相关信息或执行相应的操作。
"""

# 获取字段属性唯一值
def get_unique_values(in_data, fields):
   """
   Identify all unique values for field(s) in a data source

   :param in_data: Input data source
   :param fields: Field name
   :return: A list of unique values
   """

   # Respect extent environment where possible
   # 如果环境设置有范围，遵守范围
   if arcpy.env.extent:
       lyr_name = 'sbyloc_extent'
       try:
           lyr = arcpy.MakeFeatureLayer_management(in_data, lyr_name)[0]
           arcpy.SelectLayerByLocation_management(lyr, 'INTERSECT',
                                                  arcpy.env.extent.polygon)
           in_data = lyr_name
       except arcpy.ExecuteError:
           pass

   # Use Statistics instead of Frequency to avoid licensing limitations
   # 使用统计数据而不是频率来避免许可限制
   table_name = arcpy.CreateUniqueName('stats', 'in_memory')
   arcpy.Statistics_analysis(in_data, table_name, [(fields[0], 'FIRST')], fields)
   # 这里并不需要统计什么，所以统计字段取任意值占位就行了
   atts = [r for r in arcpy.da.SearchCursor(table_name, fields)]

   try:
       arcpy.Delete_management(table_name)
   except arcpy.ExecuteError:
       # Should delete, but don't fail for intermediate data issues
       pass

   return atts

# 根据字段唯一值创建唯一文件名
def create_name(workspace, name, extension):
   """
   Create a unique name

   :param workspace: The workspace that an expected output will be written to
   :param name: Base name of the output (list)
   :param extension: Extension including the leading period
   :return: A unique name, including pathname
   """

   # 多个属性值对其进行组合
   name = u'_'.join([unicode(i) for i in name])
   
   
   name = name.replace('"', '')
   name = name.replace("'", "")

   if name == '':  # name of '' validated to ''
       name = 'T'

   validated_name = u'{}{}'.format(
       arcpy.ValidateTableName(name, workspace),
       extension)

   unique_name = arcpy.CreateUniqueName(validated_name, workspace)

   return unique_name


def create_expression(in_data, field_name, value):
   """
   Create a SQL Expression

   :param in_data: Input data source
   :param field_name: The field name that will be queried
   :param value: The value in the field that will be queried for
   :return: SQL expression
   """

   delimited_field = arcpy.AddFieldDelimiters(in_data, field_name)
   if isinstance(value, numbers.Number):
       return u'{} = {}'.format(delimited_field, value)
   elif isinstance(value, type(None)):
       return u'{} IS NULL'.format(delimited_field)
   else:
       return u''' %s = '%s' ''' % 
       ( delimited_field, value.replace("'", "''").replace('"', '"') )
   """
   ['O'Reilly', 'Smith', "Johnson's", "Doe's House"]
   -> ['O''Reilly', 'Smith', "Johnson''s", "Doe''s House"] 
   字符串两侧的引号是标志不是内容不会被替换
   """
   """
   SQL中的标准转义方法用于处理字符串中的特殊字符，特别是单引号（'），
   因为单引号在SQL中用于界定字符串的开始和结束。如果一个字符串本身包含单引号，
   就需要对这个单引号进行转义，以便SQL引擎能够正确解析字符串的内容，
   而不是错误地将字符串提前结束。
   """

def select(datatype, *args):
   """
   Data type non-specific Select tool handling

   :param datatype: arcpy.Describe datatype keyword
   :param args: arguments for Select/TableSelect tools
   :return:
   """

   feature_data = datatype in ['FeatureClass', 'FeatureLayer']
   tool_name = 'Select' if feature_data else 'TableSelect'
   eval('arcpy.analysis.{}'.format(tool_name))(*args)


def split_by_atts(in_data, out_workspace, fields):
   """
   Split a feature class include a series of feature classes based on
   unique values in a field.

   :param in_data: The input data source
   :param out_workspace: The output workspace that data will be written to
   :param fields: Unique values in these fields will be used to split the data
   :return: A list of output pathnames (output has been created)
   """

   try:
       from itertools import izip
   except ImportError:
       izip = zip

   """ 
   这段代码的目的是为了在 Python 2 和 Python 3 之间实现兼容性。
   在 Python 2 中，
   `itertools` 模块有一个 `izip` 函数，
   它返回的是一个迭代器，用于在迭代过程中节省内存。
   而在 Python 3 中，`zip` 函数本身就已经返回了一个迭代器，
   所以不再需要 `izip`。
   
   >>> a = [1,2,3]  
   >>> b = [4,5,6]  
   >>> c = [4,5,6,7,8]  
   >>> zipped = zip(a,b)     
   # 返回一个对象  
   >>> zipped  
   <zip object at 0x103abc288>  
   >>> list(zipped)  # list() 转换为列表  
   [(1, 4), (2, 5), (3, 6)]  
   >>> list(zip(a,c))              
   # 元素个数与最短的列表一致  
   [(1, 4), (2, 5), (3, 6)]  

   >>> a1, a2 = zip(*zip(a,b))          
   # 与 zip 相反，zip(*) 可理解为解压，返回二维矩阵式  
   >>> list(a1)  
   [1, 2, 3]  
   >>> list(a2)  
   [4, 5, 6]  
   
   """
   datatype = arcpy.Describe(in_data).datatype 
   # 获取数据类型：比如要素类

   unique_values = get_unique_values(in_data, fields) 
   # 调用函数，返回独一无二的属性值

   workspace_type = arcpy.Describe(out_workspace).datatype 
   #获取输出空间的类型：比如文件

   # If output workspace is a folder add a .shp extension
   extension = ''
   if workspace_type == 'Folder':
       extension = '.shp'
   # 如果输出为文件夹，后缀加.shp

   arcpy.SetProgressor('STEP', '', 0, len(unique_values), 1)
   # 设置进度条：参数依次为：方法，标签，起始值，最终值，步长
   
   outputs = list()
   for i in unique_values:
       output = create_name(out_workspace, i, extension) 
       # 调用函数，返回值为A unique name, including pathname

       expression = u' AND '.join([
           create_expression(in_data, j[0], j[1]) 
           # 调用函数，返回值为SQL expression
           for j
           in izip(fields, i)])

       select(datatype, in_data, output, expression) 
       # 一个函数
       values_string = u', '.join([unicode(v) for v in i])

       arcpy.AddIDMessage('INFORMATIVE', 86245, values_string, output)
       # ？

       outputs.append(output)

       arcpy.SetProgressorPosition() 
       # 进度条按步长前进一步

   return outputs


if __name__ == '__main__':
   in_data = arcpy.GetParameterAsText(0)
   out_workspace = arcpy.GetParameterAsText(1)
   fields = [i.value for i in arcpy.GetParameter(2)]

   try:
       split_by_atts(in_data, out_workspace, fields)
       arcpy.SetParameterAsText(3, out_workspace)
       # 这行代码的作用：？
       """
       这个的输出值。单独运行这个脚本，设不设置都没有影响，
       但如果要把这个脚本用在其他工具里面串起来，
       下一个工具通过这段代码就能拿到想要的参数继续运行。
       """
   except Exception as err:
       arcpy.AddError(str(err))
       sys.exit(1)
代码入口程序开始解读：找到属性唯一值(get_unique_values) ——> 多个（一个）属性值组合为文件名(create_name) ——> 构建sql表达式（create_expression） ——> 选择要素并输出（select） ——> 将前面的函数调用起来（split_by_atts） ——> 主程序入口。
代码除了一些未掌握的知识点外：izip、replace("'","''")、arcpy.env.extent、SetProgressor、注释的写法；还反复使用CreateUniqueName、join、ValidateTableName、AddFieldDelimiters，使代码简单且容易理解；除此之外，这段代码风格值得反复品读。