# ============================================================================= # STAT 418: Computational Methods in Data Science # Requirements File # ============================================================================= # Course: STAT 418 (Spring 2026) # Instructor: Dr. Timothy Reese # Last Updated: January 2026 # # Installation: pip install -r requirements.txt # Note: Some Bayesian packages (e.g., cmdstanpy) require additional setup steps. # ============================================================================= # ----------------------------------------------------------------------------- # CORE SCIENTIFIC COMPUTING # Essential packages for numerical computation and data manipulation # ----------------------------------------------------------------------------- numpy>=1.24.0 # Array operations, random generation (Generator API) scipy>=1.11.0 # Statistical functions, optimization, integration pandas>=2.0.0 # DataFrames, data wrangling # ----------------------------------------------------------------------------- # STATISTICAL MODELING (Frequentist) # Linear models, GLMs, and statistical tests # ----------------------------------------------------------------------------- statsmodels>=0.14.0 # GLMs, OLS, time series, statistical tests scikit-learn>=1.3.0 # ML models, cross-validation, preprocessing patsy>=0.5.3 # Formula specification for statistical models # ----------------------------------------------------------------------------- # BAYESIAN INFERENCE & PROBABILISTIC PROGRAMMING # Core packages for MCMC, posterior computation, and model diagnostics # ----------------------------------------------------------------------------- # PyMC Ecosystem (Primary for course) pymc>=5.10.0 # Probabilistic programming, NUTS sampler arviz>=0.17.0 # Bayesian visualization, diagnostics (R-hat, ESS) pytensor>=2.18.0 # Tensor computation backend for PyMC # Stan Ecosystem (Alternative, industry standard) cmdstanpy>=1.2.0 # Python interface to CmdStan # Note: Requires separate CmdStan installation: # python -m cmdstanpy.install_cmdstan # JAX Ecosystem (GPU-accelerated Bayesian inference) jax>=0.4.20 # Autodiff, JIT compilation jaxlib>=0.4.20 # JAX backend numpyro>=0.13.0 # Probabilistic programming on JAX blackjax>=1.0.0 # MCMC samplers in JAX # Additional Bayesian Tools bambi>=0.13.0 # Bayesian model building interface (like brms) preliz>=0.3.0 # Prior elicitation tools # ----------------------------------------------------------------------------- # MACHINE LEARNING & DEEP LEARNING # Neural networks, embeddings, pre-trained models # ----------------------------------------------------------------------------- # PyTorch Ecosystem torch>=2.1.0 # Deep learning framework torchvision>=0.16.0 # Vision models and transforms # TensorFlow Ecosystem (Optional - for TensorFlow Probability) # tensorflow>=2.14.0 # Deep learning framework (uncomment if needed) # tensorflow-probability>=0.22.0 # Probabilistic layers, distributions # Hugging Face Ecosystem (LLM Integration) transformers>=4.35.0 # Pre-trained models (BERT, GPT, etc.) tokenizers>=0.15.0 # Fast tokenization datasets>=2.15.0 # Dataset loading and processing sentence-transformers>=2.2.0 # Sentence embeddings for similarity/RAG accelerate>=0.25.0 # Training acceleration # Vector Databases (RAG) faiss-cpu>=1.7.4 # Efficient similarity search chromadb>=0.4.0 # Vector database for embeddings # ----------------------------------------------------------------------------- # LLM APIs & INTEGRATION # External LLM service integration # ----------------------------------------------------------------------------- openai>=1.3.0 # OpenAI API (GPT models) anthropic>=0.8.0 # Anthropic API (Claude models) tiktoken>=0.5.0 # OpenAI tokenizer langchain>=0.1.0 # LLM application framework langchain-community>=0.0.10 # LangChain community integrations # ----------------------------------------------------------------------------- # VISUALIZATION # Publication-quality figures and interactive plots # ----------------------------------------------------------------------------- matplotlib>=3.8.0 # Core plotting library seaborn>=0.13.0 # Statistical visualization plotly>=5.18.0 # Interactive plots bokeh>=3.3.0 # Interactive visualization (alternative) networkx>=3.2 # Graph/network visualization # ----------------------------------------------------------------------------- # JUPYTER ECOSYSTEM # Interactive computing environment # ----------------------------------------------------------------------------- jupyter>=1.0.0 # Jupyter metapackage jupyterlab>=4.0.0 # Modern Jupyter interface notebook>=7.0.0 # Classic notebook interface ipywidgets>=8.1.0 # Interactive widgets nbformat>=5.9.0 # Notebook format handling nbconvert>=7.11.0 # Notebook conversion (PDF, HTML) # ----------------------------------------------------------------------------- # DOCUMENTATION (Sphinx) # Course materials generation # ----------------------------------------------------------------------------- sphinx>=7.2.0 # Documentation generator sphinx-rtd-theme>=2.0.0 # Read the Docs theme myst-parser>=2.0.0 # Markdown support for Sphinx sphinx-copybutton>=0.5.2 # Copy button for code blocks sphinx-design>=0.5.0 # Design elements (cards, tabs) sphinxcontrib-bibtex>=2.6.0 # Bibliography support nbsphinx>=0.9.3 # Jupyter notebook integration # ----------------------------------------------------------------------------- # UTILITIES & DEVELOPMENT # Helper libraries for course workflow # ----------------------------------------------------------------------------- tqdm>=4.66.0 # Progress bars joblib>=1.3.0 # Parallel computing, caching dill>=0.3.7 # Extended pickling cloudpickle>=3.0.0 # Serialization for distributed computing pyyaml>=6.0.1 # YAML parsing (configs) python-dotenv>=1.0.0 # Environment variable management requests>=2.31.0 # HTTP requests httpx>=0.25.0 # Async HTTP client # Code Quality black>=23.11.0 # Code formatter isort>=5.12.0 # Import sorting flake8>=6.1.0 # Linting mypy>=1.7.0 # Type checking pytest>=7.4.0 # Testing framework pytest-cov>=4.1.0 # Coverage reporting # Data Formats openpyxl>=3.1.2 # Excel file support xlrd>=2.0.1 # Legacy Excel support pyarrow>=14.0.0 # Parquet/Arrow support h5py>=3.10.0 # HDF5 file support # ----------------------------------------------------------------------------- # OPTIONAL: GPU ACCELERATION # Uncomment if CUDA-enabled GPU is available # ----------------------------------------------------------------------------- # cupy>=12.0.0 # GPU-accelerated NumPy # jaxlib[cuda12_pip] # JAX with CUDA support # torch[cuda] # PyTorch with CUDA support # ----------------------------------------------------------------------------- # OPTIONAL: ADDITIONAL BAYESIAN/MCMC TOOLS # Specialized tools for advanced Bayesian analysis # ----------------------------------------------------------------------------- # corner>=2.2.2 # Corner plots for posteriors # emcee>=3.1.4 # Ensemble MCMC sampler # dynesty>=2.1.3 # Nested sampling # ultranest>=3.5.0 # Robust nested sampling # bilby>=2.1.0 # Bayesian inference library (physics) # ============================================================================= # PLATFORM-SPECIFIC NOTES # ============================================================================= # # macOS (Apple Silicon): # - PyMC/PyTensor may require: conda install -c conda-forge pymc # - JAX for Apple Silicon: pip install jax-metal # # Windows: # - CmdStan requires: python -m cmdstanpy.install_cmdstan # - Some JAX features may be limited # # Linux (Recommended for full compatibility): # - All packages should install normally # - For GPU: ensure CUDA toolkit is installed # # ============================================================================= # VERSION COMPATIBILITY MATRIX # ============================================================================= # # Python: 3.10, 3.11, or 3.12 (recommended: 3.11) # # Core Stack Compatibility: # numpy 1.24+ / scipy 1.11+ / pandas 2.0+ : Tested compatible # pymc 5.10+ / arviz 0.17+ : Requires pytensor 2.18+ # jax 0.4.20+ / numpyro 0.13+ : Tested compatible # # =============================================================================