Loading...
Loading...
Guides Python SDK development in Apache Beam, including environment setup, testing, building, and running pipelines. Use when working with Python code in sdks/python/.
npx skill4agent add apache/beam python-developmentsdks/python/apache_beam/transforms/io/ml/runners/runners/worker/container/test-suites/scripts/setup.pypyproject.tomltox.inipytest.ini.pylintrc.isort.cfgmypy.ini# Install Python
pyenv install 3.X # Use supported version from gradle.properties
# Create virtual environment
pyenv virtualenv 3.X beam-dev
pyenv activate beam-devcd sdks/python
pip install -e .[gcp,test]pip install pre-commit
pre-commit install
# To disable
pre-commit uninstall*_test.py# Run all tests in a file
pytest -v apache_beam/io/textio_test.py
# Run tests in a class
pytest -v apache_beam/io/textio_test.py::TextSourceTest
# Run a specific test
pytest -v apache_beam/io/textio_test.py::TextSourceTest::test_progress*_it_test.pypython -m pytest -o log_cli=True -o log_level=Info \
apache_beam/ml/inference/pytorch_inference_it_test.py::PyTorchInference \
--test-pipeline-options='--runner=TestDirectRunner'# First build SDK tarball
pip install build && python -m build --sdist
# Run integration test
python -m pytest -o log_cli=True -o log_level=Info \
apache_beam/ml/inference/pytorch_inference_it_test.py::PyTorchInference \
--test-pipeline-options='--runner=TestDataflowRunner --project=<project>
--temp_location=gs://<bucket>/tmp
--sdk_location=dist/apache-beam-2.XX.0.dev0.tar.gz
--region=us-central1'cd sdks/python
pip install build && python -m build --sdist
# Output: sdks/python/dist/apache-beam-X.XX.0.dev0.tar.gz./gradlew :sdks:python:bdistPy311linux # For Python 3.11 on Linux./gradlew :sdks:python:container:py311:docker \
-Pdocker-repository-root=gcr.io/your-project/your-name \
-Pdocker-tag=custom \
-Ppush-containers
# Container image will be pushed to: gcr.io/your-project/your-name/beam_python3.11_sdk:custom--sdk_container_image# Install modified SDK
pip install /path/to/apache-beam.tar.gz[gcp]
# Run pipeline
python my_pipeline.py \
--runner=DataflowRunner \
--sdk_location=/path/to/apache-beam.tar.gz \
--project=my_project \
--region=us-central1 \
--temp_location=gs://my-bucket/tempNameError--save_main_session--requirements_file=requirements.txt@pytest.mark.it_postcommit# Run WordCount
./gradlew :sdks:python:wordCount
# Check environment
./gradlew :checkSetup# Linting
pylint apache_beam/
# Type checking
mypy apache_beam/
# Formatting (via yapf)
yapf -i apache_beam/file.py
# Import sorting
isort apache_beam/file.py