File: ci.yml

package info (click to toggle)
nltk 3.9.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 9,452 kB
  • sloc: python: 79,931; makefile: 180; sh: 82; xml: 17
file content (157 lines) | stat: -rw-r--r-- 5,500 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
name: ci-workflow

on: [push, pull_request, workflow_dispatch]

env:
  THIRD_PARTY_DIR: ${{ github.workspace }}/third
  CORENLP: ${{ github.workspace }}/third/stanford-corenlp
  CORENLP_MODELS: ${{ github.workspace }}/third/stanford-corenlp
  STANFORD_PARSER: ${{ github.workspace }}/third/stanford-parser
  STANFORD_MODELS: ${{ github.workspace }}/third/stanford-postagger
  STANFORD_POSTAGGER: ${{ github.workspace }}/third/stanford-postagger
  SENNA: ${{ github.workspace }}/third/senna
  PROVER9: ${{ github.workspace }}/third/prover9/bin
  MEGAM: ${{ github.workspace }}/third/megam
  MALT_PARSER: ${{ github.workspace }}/third/maltparser

jobs:
  pre-commit:
    name: pre-commit
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.13"  # or your chosen version

      - name: Install pre-commit
        run: pip install pre-commit

      - name: Run pre-commit hooks
        run: pre-commit run --all-files

  minimal_download_test:
    name: Minimal NLTK Download Test
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
    steps:
      - uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.14"

      - name: Install regex
        run: pip install regex

      - name: Set NLTK_DATA environment variable
        shell: bash
        run: echo "NLTK_DATA=${{ github.workspace }}/nltk_data" >> $GITHUB_ENV

      - name: Show NLTK_DATA in shell
        shell: bash
        run: |
          echo "NLTK_DATA in shell: $NLTK_DATA"

      - name: Ensure minimal NLTK data for cache
        shell: bash
        run: |
          python -c "import os, nltk; d = os.environ['NLTK_DATA']; import pathlib; pathlib.Path(d).mkdir(parents=True, exist_ok=True); nltk.download('wordnet', download_dir=d)"

  test:
    name: Python ${{ matrix.python-version }} on ${{ matrix.os }}
    needs: [pre-commit, minimal_download_test]
    strategy:
      matrix:
        python-version: ['3.10', '3.11', '3.12', '3.13', '3.14', '3.14t']
        os: [ubuntu-latest, macos-latest, windows-latest]
        exclude:
          - os: windows-latest
            python-version: '3.14t'  # scikit-learn issue on Py3.14t on Windows
      fail-fast: false
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}

      - name: Set NLTK_DATA environment variable
        shell: bash
        run: echo "NLTK_DATA=${{ github.workspace }}/nltk_data" >> $GITHUB_ENV

      - name: Install dependencies
        run: |
          pip install --upgrade pip
          pip install --upgrade --requirement requirements-ci.txt

      - name: Ensure minimal NLTK data for cache
        shell: bash
        run: |
          python -c "import os, nltk; d = os.environ['NLTK_DATA']; import pathlib; pathlib.Path(d).mkdir(parents=True, exist_ok=True); nltk.download('wordnet', download_dir=d)"

      - name: Show NLTK_DATA and workspace
        shell: bash
        run: |
          echo "GITHUB_WORKSPACE is: $GITHUB_WORKSPACE"
          echo "NLTK_DATA is: $NLTK_DATA"
          python -c "import os; print('Python sees GITHUB_WORKSPACE:', os.environ.get('GITHUB_WORKSPACE')); print('Python sees NLTK_DATA:', os.environ.get('NLTK_DATA'))"

      - name: List contents of NLTK data dir
        shell: bash
        run: ls -lR "${{ github.workspace }}/nltk_data" || echo "nltk_data not found"

      - name: Cache nltk data
        uses: actions/cache@v5
        id: nltk-data-cache
        with:
          path: ${{ github.workspace }}/nltk_data
          key: nltk_data_${{ runner.os }}_v1

      - name: Download nltk data on cache miss
        if: steps.nltk-data-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          python -c "import os; import nltk; from pathlib import Path; path = Path(os.environ['NLTK_DATA']); path.mkdir(parents=True, exist_ok=True); nltk.download('all', download_dir=path)"

      # --- THIRD PARTY TOOLS CACHE SECTION ---
      - name: Ensure third-party directory exists
        run: mkdir -p "${{ env.THIRD_PARTY_DIR }}"

      - name: Cache third-party tools
        uses: actions/cache@v5
        id: third-party-cache
        with:
          path: ${{ env.THIRD_PARTY_DIR }}
          key: third_${{ runner.os }}_${{ hashFiles('tools/github_actions/third-party.sh') }}_v1

      - name: List contents of third-party dir before download
        shell: bash
        run: ls -lR "${{ env.THIRD_PARTY_DIR }}" || echo "third-party dir not found"

      - name: Download third-party data on cache miss
        if: steps.third-party-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          chmod +x ./tools/github_actions/third-party.sh
          ./tools/github_actions/third-party.sh

      - name: List contents of third-party dir after download/cache
        shell: bash
        run: ls -lR "${{ env.THIRD_PARTY_DIR }}" || echo "third-party dir not found"

      - name: Print NLTK data search paths
        shell: bash
        run: python -c "import nltk; print('NLTK data search paths:', nltk.data.path)"

      - name: Run pytest
        shell: bash
        run: |
          pytest --numprocesses auto -rsx --doctest-modules nltk