{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from tqdm.auto import tqdm\n",
    "import feedparser\n",
    "from pathlib import Path\n",
    "import shutil\n",
    "from plotly import express as ex\n",
    "import numpy as np\n",
    "from markdownify import markdownify\n",
    "from bs4 import BeautifulSoup\n",
    "import re\n",
    "import codecs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get feed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b'<?xml version=\"1.0\" encoding=\"utf-8\" ?>\\n\\n<rss version=\"2.0\" \\n   xmlns:rdf=\"http://www.w3.org/1999/02'\n"
     ]
    }
   ],
   "source": [
    "# fetching posts\n",
    "# adding verify=false cause the SSL cert is busted\n",
    "r  = requests.get(\"https://www.provos.org/rss.php?all=1&version=2.0\", verify=False)\n",
    "raw_feed = r.content\n",
    "print(raw_feed[:100])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "total posts 149\n"
     ]
    }
   ],
   "source": [
    "feed = feedparser.parse(raw_feed)\n",
    "posts = feed.entries\n",
    "print(\"total posts\", len(posts))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "avg 135.08053691275168 std 101.75431107589749\n"
     ]
    }
   ],
   "source": [
    "word_count = [len(p.summary.split(\" \")) for p in posts]\n",
    "print('avg', np.average(word_count), 'std', np.std(word_count))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "alignmentgroup": "True",
         "bingroup": "x",
         "hovertemplate": "x=%{x}<br>count=%{y}<extra></extra>",
         "legendgroup": "",
         "marker": {
          "color": "#636efa",
          "pattern": {
           "shape": ""
          }
         },
         "name": "",
         "offsetgroup": "",
         "orientation": "v",
         "showlegend": false,
         "type": "histogram",
         "x": [
          428,
          313,
          45,
          101,
          322,
          71,
          514,
          143,
          200,
          120,
          44,
          38,
          44,
          26,
          72,
          87,
          102,
          49,
          46,
          59,
          99,
          98,
          44,
          96,
          93,
          212,
          98,
          143,
          113,
          263,
          185,
          295,
          117,
          151,
          138,
          91,
          17,
          40,
          65,
          151,
          163,
          187,
          210,
          161,
          252,
          76,
          48,
          103,
          44,
          114,
          179,
          95,
          50,
          182,
          266,
          60,
          127,
          55,
          18,
          79,
          70,
          393,
          52,
          150,
          274,
          204,
          214,
          168,
          289,
          227,
          125,
          189,
          163,
          123,
          222,
          94,
          177,
          296,
          109,
          51,
          76,
          445,
          331,
          98,
          170,
          105,
          1,
          73,
          1,
          162,
          127,
          158,
          213,
          344,
          46,
          236,
          73,
          101,
          74,
          78,
          144,
          402,
          42,
          75,
          47,
          82,
          173,
          535,
          274,
          88,
          95,
          92,
          56,
          79,
          131,
          56,
          196,
          54,
          315,
          21,
          80,
          34,
          56,
          31,
          81,
          145,
          79,
          58,
          42,
          80,
          59,
          122,
          42,
          75,
          274,
          117,
          77,
          211,
          91,
          76,
          109,
          92,
          58,
          114,
          54,
          222,
          52,
          265,
          70
         ],
         "xaxis": "x",
         "yaxis": "y"
        }
       ],
       "layout": {
        "barmode": "relative",
        "legend": {
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "x"
         }
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "count"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# sanity check\n",
    "ex.histogram(x=word_count)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## setup data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clean-up previous export\n"
     ]
    }
   ],
   "source": [
    "workdir = Path(\"../data/\")\n",
    "mddir = workdir / 'md'\n",
    "imgdir = workdir / 'static' / 'images'\n",
    "if workdir.exists():\n",
    "    print(\"clean-up previous export\")\n",
    "    shutil.rmtree(workdir)\n",
    "workdir.mkdir(parents=True)\n",
    "mddir.mkdir(parents=True)\n",
    "imgdir.mkdir(parents=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## parsing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_images(html, img_dir):\n",
    "    bs = BeautifulSoup(html, 'html.parser')\n",
    "    images = bs.find_all('img') #, {'src':re.compile('(.jpg|.png|.gif)')})\n",
    "    srcs = [i['src'] for i in images]\n",
    "    r = []\n",
    "    for src in srcs:\n",
    "        if not \"provos.org\" in src:\n",
    "            continue\n",
    "        img_name = src.split('/')[-1]\n",
    "        fname = img_dir / img_name\n",
    "        \n",
    "        # download if only needed\n",
    "        if not fname.exists():\n",
    "            req = requests.get(src, verify=False)\n",
    "            if req.status_code == 200:\n",
    "                with open(fname, 'wb') as f:\n",
    "                    f.write(req.content)\n",
    "        r.append({\"url\": src, \"fname\": str(img_name)})\n",
    "    return r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['title', 'title_detail', 'links', 'link', 'tags', 'comments', 'wfw_comment', 'slash_comments', 'wfw_commentrss', 'authors', 'author', 'author_detail', 'content', 'summary', 'published', 'published_parsed', 'id', 'guidislink'])"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "posts[10].keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "creating posts:  61%|██████    | 91/149 [00:00<00:00, 901.04it/s]C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning:\n",
      "\n",
      "Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "\n",
      "C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning:\n",
      "\n",
      "Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "\n",
      "C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning:\n",
      "\n",
      "Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "\n",
      "C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning:\n",
      "\n",
      "Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "\n",
      "C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning:\n",
      "\n",
      "Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "\n",
      "C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning:\n",
      "\n",
      "Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "\n",
      "C:\\Users\\elie\\AppData\\Roaming\\Python\\Python310\\site-packages\\urllib3\\connectionpool.py:1045: InsecureRequestWarning:\n",
      "\n",
      "Unverified HTTPS request is being made to host 'www.provos.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n",
      "\n",
      "creating posts: 100%|██████████| 149/149 [00:02<00:00, 63.35it/s]\n"
     ]
    }
   ],
   "source": [
    "for p in tqdm(posts, desc=\"creating posts\"):\n",
    "    title = p.title\n",
    "    slug = p.title.lower().replace(' ', '-').replace('--', '-')\n",
    "    slug = re.sub('[^a-z0-9-]', '', slug)\n",
    "    postpath = mddir / f\"{slug}.md\" \n",
    "    \n",
    "    date = p.published\n",
    "    if 'tags' in p:\n",
    "        tags = [t['term'] for t in p.tags]\n",
    "    else:\n",
    "        tags = []\n",
    "    content = p.summary\n",
    "\n",
    "    # dl images and get info\n",
    "    img_info = get_images(content, imgdir)\n",
    "\n",
    "    # markdown\n",
    "    md = markdownify(content) \n",
    "    md = md.replace('\\n\\n', '\\n')   \n",
    "\n",
    "    # replace with relative images\n",
    "    for i in img_info:\n",
    "        md = md.replace(i['url'], f\"/static/images/{i['fname']}\")\n",
    "\n",
    "    with codecs.open(postpath, 'w+', encoding='utf-8') as o:\n",
    "        o.write('---\\n')\n",
    "        o.write(f'layout: post\\n')\n",
    "        o.write(f'title: {title}\\n')\n",
    "        o.write(f'slug: {slug}\\n')\n",
    "        o.write(f'permalink: posts/{slug}\\n')\n",
    "        o.write(\"author: Niels Provos\\n\")\n",
    "        o.write(f'date: {date}\\n')     \n",
    "        o.write(f'tags: {\",\".join(tags)}\\n')        \n",
    "        o.write(f'published: true\\n')\n",
    "        o.write('---\\n')\n",
    "        o.write(f'# {title}\\n')\n",
    "        o.write(md)\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2 (default, Feb 28 2021, 17:03:44) \n[GCC 10.2.1 20210110]"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
