Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion .github/workflows/data-processing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,25 @@ jobs:
- name: Install Python dependencies
run: python3 -m pip install -r ./requirements.txt



#========================================
# Setup Node.js for bibliography processing
#========================================
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '18'
cache: 'npm'
cache-dependency-path: bibtex_to_apa/package-lock.json

Copy link

Copilot AI Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actions/setup-node caching is configured with cache: 'npm' but without cache-dependency-path, so it will default to a root package-lock.json and likely miss caching for bibtex_to_apa/. Set cache-dependency-path: bibtex_to_apa/package-lock.json (and include any other lockfiles you want cached).

Suggested change
cache-dependency-path: bibtex_to_apa/package-lock.json

Copilot uses AI. Check for mistakes.
#========================================
# Install Node.js dependencies for bibliography processing
#========================================
- name: Install Node.js dependencies
run: |
cd bibtex_to_apa
npm install

#========================================
# Process contributor data using Tenzing script
#========================================
Expand Down Expand Up @@ -202,6 +220,15 @@ jobs:
fi
done

#========================================
# Generate APA lookup from bibliography
#========================================
- name: Generate APA lookup
continue-on-error: true # Continue even if this step fails
run: |
cd bibtex_to_apa
node bibtex_to_apa.js -o '../content/glossary/apa_lookup.json'

#========================================
# Process and generate glossary files
#========================================
Expand All @@ -211,6 +238,17 @@ jobs:
run: python3 content/glossary/_create_glossaries.py
# Execute the glossary script that generates glossary markdown files

- name: Check for missing references
if: always()
run: |
if [ -f "content/glossary/missing_references.txt" ]; then
echo "Missing references found:"
cat content/glossary/missing_references.txt
# Optionally fail the workflow or create an issue
else
echo "All references resolved successfully"
fi

#========================================
# Download Google Analytics data and validate
#========================================
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,6 @@ gha-creds-*.json

# Tenzing failure reports (temporary files for CI)
scripts/forrt_contribs/tenzing_failures.json

# Bibtex to APA converter output
bibtex_to_apa/node_modules/
91 changes: 91 additions & 0 deletions bibtex_to_apa/bibtex_to_apa.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
const { Cite } = require('@citation-js/core');
require('@citation-js/plugin-bibtex');
require('@citation-js/plugin-csl');
const fs = require('fs');

const DEFAULT_INPUT = 'https://docs.google.com/document/d/1-KKsOYZWJ3LdgdO2b2uJsOG2AmUDaQBNqWVVTY2W4W8/edit?tab=t.0';
const DEFAULT_OUTPUT = 'apa_lookup.json';

async function fetchBibtex(input) {
if (!input.startsWith('http')) {
return fs.readFileSync(input, 'utf-8');
}

if (input.includes('docs.google.com')) {
const match = input.match(/\/d\/([a-zA-Z0-9_-]+)/);
if (!match) throw new Error('Invalid Google Doc URL');
const exportUrl = `https://docs.google.com/document/d/${match[1]}/export?format=txt`;
const response = await fetch(exportUrl);
if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
let text = await response.text();
return text.replace(/\[[a-z]+\]/gi, ''); // Remove Google Docs comment markers
}

const response = await fetch(input);
if (!response.ok) throw new Error(`Failed to fetch: ${response.status}`);
return response.text();
}

function extractUrl(entry) {
if (entry.URL) return entry.URL;
if (entry.note) {
const match = entry.note.match(/https?:\/\/[^\s]+/);
if (match) return match[0];
}
return null;
}

function bibtexToApaJson(bibtexContent, includeUrl = true) {
const cite = new Cite(bibtexContent);
const result = {};

for (const entry of cite.data) {
const key = entry.id || entry['citation-key'];
let ref = new Cite(entry).format('bibliography', {
format: 'text',
template: 'apa',
lang: 'en-US'
}).trim();

if (includeUrl) {
const url = extractUrl(entry);
if (url && !url.includes('doi.org') && !ref.includes(url)) {
ref = ref.match(/https?:\/\/[^\s]+$/)
? `${ref} Retrieved from ${url}`
: ref.replace(/\.?$/, `. Retrieved from ${url}`);
}
}

result[key] = ref;
}

return result;
}

async function main() {
const args = process.argv.slice(2);
let input = DEFAULT_INPUT;
let output = DEFAULT_OUTPUT;
let includeUrl = true;

for (let i = 0; i < args.length; i++) {
if (args[i] === '-i' || args[i] === '--input') input = args[++i];
else if (args[i] === '-o' || args[i] === '--output') output = args[++i];
else if (args[i] === '--no-url') includeUrl = false;
else if (args[i] === '-h' || args[i] === '--help') {
console.log(`Usage: node bibtex_to_apa.js [-i INPUT] [-o OUTPUT] [--no-url]
Options:
-i, --input Input BibTeX (URL or file). Default: Google Doc
-o, --output Output JSON file. Default: apa_lookup.json
--no-url Don't append URLs to references`);
process.exit(0);
}
}

const bibtex = await fetchBibtex(input);
const apaJson = bibtexToApaJson(bibtex, includeUrl);
fs.writeFileSync(output, JSON.stringify(apaJson, null, 2));
console.log(`Wrote ${Object.keys(apaJson).length} references to ${output}`);
}

main().catch(console.error);
224 changes: 224 additions & 0 deletions bibtex_to_apa/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading