This notebook analyzes the J-Editorial framework implementation on this website by:
Loading frontmatter data from all markdown files
Calculating Layer 2 extrinsic dimensions
Generating visualizations of content health and quality
# Import required libraries
import os
import yaml
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
from datetime import datetime
import subprocess1. Data Loading¶
Parse frontmatter from all markdown files in the j-editorial directory.
def parse_frontmatter(file_path):
"""Extract YAML frontmatter from markdown file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if content.startswith('---'):
parts = content.split('---', 2)
if len(parts) >= 3:
try:
frontmatter = yaml.safe_load(parts[1])
return frontmatter, None
except yaml.YAMLError as e:
return None, f"YAML parsing error: {e}"
return None, "No frontmatter found"
except Exception as e:
return None, f"Error reading file: {e}"
# Find all markdown files in j-editorial directory
base_dir = Path('../..')
j_editorial_dir = base_dir # Already in j-editorial/, just go up to it
md_files = list(j_editorial_dir.glob('*.md'))
print(f"Found {len(md_files)} markdown files in {j_editorial_dir}\n")
# Parse frontmatter
documents = []
skipped_files = []
for file_path in md_files:
fm, error = parse_frontmatter(file_path)
if fm and 'refinement' in fm:
fm['file_name'] = file_path.stem
fm['title'] = fm.get('title', file_path.stem)
documents.append(fm)
print(f"✓ {file_path.name}: refinement={fm['refinement']:.2f}")
else:
reason = error if error else "Missing 'refinement' property"
skipped_files.append((file_path.name, reason))
print(f"⚠ {file_path.name}: {reason}")
# Create DataFrame
if len(documents) > 0:
df = pd.DataFrame(documents)
print(f"\n✓ Successfully loaded {len(df)} documents with J-Editorial metadata")
print(f"⚠ Skipped {len(skipped_files)} files\n")
df[['title', 'refinement', 'origin', 'form', 'audience']].head()
else:
print(f"\n⚠ No documents found with complete J-Editorial metadata!")
print(f"\nTo use this notebook, add the following frontmatter to your markdown files:")
print("""---
title: "Your Document Title"
refinement: 0.85
origin: research
form: stable
audience: public
stubs: []
---""")
# Create empty DataFrame with expected columns
df = pd.DataFrame(columns=['title', 'refinement', 'origin', 'form', 'audience', 'stubs', 'file_name'])
Found 4 markdown files in ../..
✓ layer-3.md: refinement=0.80
✓ layer-2.md: refinement=0.80
✓ layer-1.md: refinement=0.80
✓ index.md: refinement=0.85
✓ Successfully loaded 4 documents with J-Editorial metadata
⚠ Skipped 0 files
2. Calculate Layer 2 Dimensions¶
Implement the extrinsic dimension calculations from the framework.
def calculate_health(row):
"""Calculate health dimension based on refinement and stubs."""
r = row['refinement']
stubs = len(row.get('stubs', [])) if isinstance(row.get('stubs'), list) else 0
if r >= 0.85 and stubs == 0:
return 'excellent'
elif r >= 0.70 and stubs <= 1:
return 'good'
elif r >= 0.50 and stubs <= 3:
return 'fair'
elif r >= 0.30:
return 'poor'
else:
return 'critical'
def calculate_usefulness(row):
"""Calculate usefulness based on refinement vs audience quality gate."""
gates = {
'personal': 0.30,
'internal': 0.60,
'public': 0.80,
'published': 0.90
}
audience = row.get('audience', 'public')
gate = gates.get(audience, 0.80)
delta = row['refinement'] - gate
if delta >= 0.10:
return 'ready'
elif delta >= 0:
return 'usable'
elif delta >= -0.20:
return 'developing'
else:
return 'insufficient'
def calculate_stage(row):
"""Calculate workflow stage."""
if row.get('form') == 'archived':
return 'archived'
if row.get('audience') == 'published':
return 'published'
r = row['refinement']
stubs = len(row.get('stubs', [])) if isinstance(row.get('stubs'), list) else 0
if r >= 0.80 and stubs == 0:
return 'polish'
elif r >= 0.70:
return 'review'
elif r >= 0.50:
return 'develop'
else:
return 'capture'
# Apply calculations
df['health'] = df.apply(calculate_health, axis=1)
df['usefulness'] = df.apply(calculate_usefulness, axis=1)
df['stage'] = df.apply(calculate_stage, axis=1)
df['stubs_count'] = df['stubs'].apply(lambda x: len(x) if isinstance(x, list) else 0)
# Display results
df[['title', 'refinement', 'health', 'usefulness', 'stage']]Loading...
3. Health Distribution Visualization¶
# Health distribution interactive pie chart
health_counts = df['health'].value_counts()
colors = {'excellent': '#2ecc71', 'good': '#3498db', 'fair': '#f39c12',
'poor': '#e74c3c', 'critical': '#95a5a6'}
fig = go.Figure(data=[go.Pie(
labels=health_counts.index,
values=health_counts.values,
marker=dict(colors=[colors.get(h, '#95a5a6') for h in health_counts.index]),
textposition='inside',
textinfo='label+percent',
hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
)])
fig.update_layout(
title=dict(text='Content Health Distribution', font=dict(size=20, family='Arial Black')),
height=500,
showlegend=True,
legend=dict(orientation='v', yanchor='middle', y=0.5, xanchor='left', x=1.05)
)
print(f"Total documents: {len(df)}")
print(f"Average refinement: {df['refinement'].mean():.2f}")
print(f"Health distribution:\n{health_counts}")
figLoading...
4. Refinement Score Distribution¶
# Interactive refinement histogram
fig = go.Figure()
# Add histogram
fig.add_trace(go.Histogram(
x=df['refinement'],
nbinsx=20,
marker_color='#3498db',
opacity=0.7,
marker_line_color='black',
marker_line_width=1,
name='Distribution',
hovertemplate='Refinement: %{x:.2f}<br>Count: %{y}<extra></extra>'
))
mean_val = df['refinement'].mean()
# Add mean line
fig.add_vline(
x=mean_val,
line_dash='dash',
line_color='red',
line_width=2,
annotation_text=f'Mean: {mean_val:.2f}',
annotation_position='top'
)
# Add quality gate line
fig.add_vline(
x=0.80,
line_dash='dash',
line_color='green',
line_width=2,
annotation_text='Public Quality Gate (0.80)',
annotation_position='top'
)
fig.update_layout(
title=dict(text='Refinement Score Distribution', font=dict(size=20, family='Arial Black')),
xaxis_title='Refinement Score',
yaxis_title='Number of Documents',
height=500,
showlegend=False,
xaxis=dict(range=[0, 1]),
hovermode='x unified'
)
figLoading...
5. Priority Matrix¶
Visualize documents by health and stubs to identify attention priorities.
# Interactive priority matrix scatter plot
health_order = {'excellent': 5, 'good': 4, 'fair': 3, 'poor': 2, 'critical': 1}
df['health_score'] = df['health'].map(health_order)
health_colors = {'excellent': '#2ecc71', 'good': '#3498db', 'fair': '#f39c12',
'poor': '#e74c3c', 'critical': '#95a5a6'}
fig = px.scatter(
df,
x='refinement',
y='stubs_count',
color='health',
size='health_score',
hover_data={'refinement': ':.2f', 'stubs_count': True, 'title': True, 'health': True},
text='file_name',
color_discrete_map=health_colors,
title='Content Priority Matrix'
)
fig.update_traces(
marker=dict(size=15, line=dict(width=2, color='black'), opacity=0.7),
textposition='top center',
textfont=dict(size=10)
)
fig.update_layout(
title=dict(text='Content Priority Matrix', font=dict(size=20, family='Arial Black')),
xaxis_title='Refinement Score',
yaxis_title='Number of Stubs',
height=600,
hovermode='closest',
legend=dict(title='Health Status')
)
figLoading...
6. Summary Statistics¶
# Summary statistics
print("=" * 60)
print("J-EDITORIAL METRICS SUMMARY")
print("=" * 60)
print(f"\nTotal Documents: {len(df)}")
print(f"Average Refinement: {df['refinement'].mean():.2f}")
print(f"\nHealth Distribution:")
print(health_counts)
print(f"\nQuality Gate Compliance:")
public_docs = df[df['audience'].isin(['public', 'published'])]
passing = len(public_docs[(public_docs['refinement'] >= 0.80) & (public_docs['stubs_count'] == 0)])
print(f" Public/Published docs: {len(public_docs)}")
print(f" Passing quality gates: {passing}")
print(f" Compliance rate: {(passing/len(public_docs)*100) if len(public_docs) > 0 else 100:.0f}%")
print("\n" + "=" * 60)============================================================
J-EDITORIAL METRICS SUMMARY
============================================================
Total Documents: 4
Average Refinement: 0.81
Health Distribution:
health
good 3
excellent 1
Name: count, dtype: int64
Quality Gate Compliance:
Public/Published docs: 4
Passing quality gates: 4
Compliance rate: 100%
============================================================