J-Editorial Metrics Analysis

This notebook analyzes the J-Editorial framework implementation on this website by:

Loading frontmatter data from all markdown files
Calculating Layer 2 extrinsic dimensions
Generating visualizations of content health and quality

# Import required libraries
import os
import yaml
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
from datetime import datetime
import subprocess

1. Data Loading¶

Parse frontmatter from all markdown files in the j-editorial directory.

def parse_frontmatter(file_path):
    """Extract YAML frontmatter from markdown file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        if content.startswith('---'):
            parts = content.split('---', 2)
            if len(parts) >= 3:
                try:
                    frontmatter = yaml.safe_load(parts[1])
                    return frontmatter, None
                except yaml.YAMLError as e:
                    return None, f"YAML parsing error: {e}"
        return None, "No frontmatter found"
    except Exception as e:
        return None, f"Error reading file: {e}"

# Find all markdown files in j-editorial directory
base_dir = Path('../..')
j_editorial_dir = base_dir  # Already in j-editorial/, just go up to it
md_files = list(j_editorial_dir.glob('*.md'))

print(f"Found {len(md_files)} markdown files in {j_editorial_dir}\n")

# Parse frontmatter
documents = []
skipped_files = []

for file_path in md_files:
    fm, error = parse_frontmatter(file_path)
    if fm and 'refinement' in fm:
        fm['file_name'] = file_path.stem
        fm['title'] = fm.get('title', file_path.stem)
        documents.append(fm)
        print(f"✓ {file_path.name}: refinement={fm['refinement']:.2f}")
    else:
        reason = error if error else "Missing 'refinement' property"
        skipped_files.append((file_path.name, reason))
        print(f"⚠ {file_path.name}: {reason}")

# Create DataFrame
if len(documents) > 0:
    df = pd.DataFrame(documents)
    print(f"\n✓ Successfully loaded {len(df)} documents with J-Editorial metadata")
    print(f"⚠ Skipped {len(skipped_files)} files\n")
    df[['title', 'refinement', 'origin', 'form', 'audience']].head()
else:
    print(f"\n⚠ No documents found with complete J-Editorial metadata!")
    print(f"\nTo use this notebook, add the following frontmatter to your markdown files:")
    print("""---
title: "Your Document Title"
refinement: 0.85
origin: research
form: stable
audience: public
stubs: []
---""")
    # Create empty DataFrame with expected columns
    df = pd.DataFrame(columns=['title', 'refinement', 'origin', 'form', 'audience', 'stubs', 'file_name'])

Found 4 markdown files in ../..

✓ layer-3.md: refinement=0.85
✓ layer-2.md: refinement=0.85
✓ layer-1.md: refinement=0.75
✓ index.md: refinement=0.85

✓ Successfully loaded 4 documents with J-Editorial metadata
⚠ Skipped 0 files

2. Calculate Layer 2 Dimensions¶

Implement the extrinsic dimension calculations from the framework.

def calculate_health(row):
    """Calculate health dimension based on refinement and stubs."""
    r = row['refinement']
    stubs = len(row.get('stubs', [])) if isinstance(row.get('stubs'), list) else 0
    
    if r >= 0.85 and stubs == 0:
        return 'excellent'
    elif r >= 0.70 and stubs <= 1:
        return 'good'
    elif r >= 0.50 and stubs <= 3:
        return 'fair'
    elif r >= 0.30:
        return 'poor'
    else:
        return 'critical'

def calculate_usefulness(row):
    """Calculate usefulness based on refinement vs audience quality gate."""
    gates = {
        'personal': 0.30,
        'internal': 0.60,
        'public': 0.80,
        'published': 0.90
    }
    
    audience = row.get('audience', 'public')
    gate = gates.get(audience, 0.80)
    delta = row['refinement'] - gate
    
    if delta >= 0.10:
        return 'ready'
    elif delta >= 0:
        return 'usable'
    elif delta >= -0.20:
        return 'developing'
    else:
        return 'insufficient'

def calculate_stage(row):
    """Calculate workflow stage."""
    if row.get('form') == 'archived':
        return 'archived'
    if row.get('audience') == 'published':
        return 'published'
    
    r = row['refinement']
    stubs = len(row.get('stubs', [])) if isinstance(row.get('stubs'), list) else 0
    
    if r >= 0.80 and stubs == 0:
        return 'polish'
    elif r >= 0.70:
        return 'review'
    elif r >= 0.50:
        return 'develop'
    else:
        return 'capture'

# Apply calculations
df['health'] = df.apply(calculate_health, axis=1)
df['usefulness'] = df.apply(calculate_usefulness, axis=1)
df['stage'] = df.apply(calculate_stage, axis=1)
df['stubs_count'] = df['stubs'].apply(lambda x: len(x) if isinstance(x, list) else 0)

# Display results
df[['title', 'refinement', 'health', 'usefulness', 'stage']]

3. Health Distribution Visualization¶

# Health distribution interactive pie chart
health_counts = df['health'].value_counts()
colors = {'excellent': '#2ecc71', 'good': '#3498db', 'fair': '#f39c12', 
          'poor': '#e74c3c', 'critical': '#95a5a6'}

fig = go.Figure(data=[go.Pie(
    labels=health_counts.index,
    values=health_counts.values,
    marker=dict(colors=[colors.get(h, '#95a5a6') for h in health_counts.index]),
    textposition='inside',
    textinfo='label+percent',
    hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
)])

fig.update_layout(
    title=dict(text='Content Health Distribution', font=dict(size=20, family='Arial Black')),
    height=500,
    showlegend=True,
    legend=dict(orientation='v', yanchor='middle', y=0.5, xanchor='left', x=1.05)
)

print(f"Total documents: {len(df)}")
print(f"Average refinement: {df['refinement'].mean():.2f}")
print(f"Health distribution:\n{health_counts}")

fig

Total documents: 4
Average refinement: 0.83
Health distribution:
health
excellent    3
good         1
Name: count, dtype: int64

# Interactive refinement histogram
fig = go.Figure()

# Add histogram
fig.add_trace(go.Histogram(
    x=df['refinement'],
    nbinsx=20,
    marker_color='#3498db',
    opacity=0.7,
    marker_line_color='black',
    marker_line_width=1,
    name='Distribution',
    hovertemplate='Refinement: %{x:.2f}<br>Count: %{y}<extra></extra>'
))

mean_val = df['refinement'].mean()

# Add mean line
fig.add_vline(
    x=mean_val,
    line_dash='dash',
    line_color='red',
    line_width=2,
    annotation_text=f'Mean: {mean_val:.2f}',
    annotation_position='top'
)

# Add quality gate line
fig.add_vline(
    x=0.80,
    line_dash='dash',
    line_color='green',
    line_width=2,
    annotation_text='Public Quality Gate (0.80)',
    annotation_position='top'
)

fig.update_layout(
    title=dict(text='Refinement Score Distribution', font=dict(size=20, family='Arial Black')),
    xaxis_title='Refinement Score',
    yaxis_title='Number of Documents',
    height=500,
    showlegend=False,
    xaxis=dict(range=[0, 1]),
    hovermode='x unified'
)

fig

5. Priority Matrix¶

Visualize documents by health and stubs to identify attention priorities.

# Interactive priority matrix scatter plot
health_order = {'excellent': 5, 'good': 4, 'fair': 3, 'poor': 2, 'critical': 1}
df['health_score'] = df['health'].map(health_order)
health_colors = {'excellent': '#2ecc71', 'good': '#3498db', 'fair': '#f39c12', 
                 'poor': '#e74c3c', 'critical': '#95a5a6'}

fig = px.scatter(
    df,
    x='refinement',
    y='stubs_count',
    color='health',
    size='health_score',
    hover_data={'refinement': ':.2f', 'stubs_count': True, 'title': True, 'health': True},
    text='file_name',
    color_discrete_map=health_colors,
    title='Content Priority Matrix'
)

fig.update_traces(
    marker=dict(size=15, line=dict(width=2, color='black'), opacity=0.7),
    textposition='top center',
    textfont=dict(size=10)
)

fig.update_layout(
    title=dict(text='Content Priority Matrix', font=dict(size=20, family='Arial Black')),
    xaxis_title='Refinement Score',
    yaxis_title='Number of Stubs',
    height=600,
    hovermode='closest',
    legend=dict(title='Health Status')
)

fig

6. Summary Statistics¶

# Summary statistics
print("=" * 60)
print("J-EDITORIAL METRICS SUMMARY")
print("=" * 60)
print(f"\nTotal Documents: {len(df)}")
print(f"Average Refinement: {df['refinement'].mean():.2f}")
print(f"\nHealth Distribution:")
print(health_counts)
print(f"\nQuality Gate Compliance:")
public_docs = df[df['audience'].isin(['public', 'published'])]
passing = len(public_docs[(public_docs['refinement'] >= 0.80) & (public_docs['stubs_count'] == 0)])
print(f"  Public/Published docs: {len(public_docs)}")
print(f"  Passing quality gates: {passing}")
print(f"  Compliance rate: {(passing/len(public_docs)*100) if len(public_docs) > 0 else 100:.0f}%")
print("\n" + "=" * 60)

============================================================
J-EDITORIAL METRICS SUMMARY
============================================================

Total Documents: 4
Average Refinement: 0.83

Health Distribution:
health
excellent    3
good         1
Name: count, dtype: int64

Quality Gate Compliance:
  Public/Published docs: 4
  Passing quality gates: 3
  Compliance rate: 75%

============================================================