howto cleanup | rotstift :)

This commit is contained in:
lovebird 2025-03-24 21:04:37 +01:00
parent 77ca825a11
commit ec082dc6ab
10 changed files with 108 additions and 122 deletions

View File

@ -23,7 +23,7 @@
"format": "unix-time"
}
],
"default": "2025-03-24T17:35:42.252Z"
"default": "2025-03-24T19:56:52.552Z"
},
"description": {
"type": "string",

File diff suppressed because one or more lines are too long

View File

@ -1,9 +1,9 @@
{
"model": "google/gemini-exp-1206:free",
"model": "gpt-4o-mini",
"messages": [
{
"role": "user",
"content": "Return a list of useful references (only with links), as Markdown, grouped : Articles, Books, Papers, Youtube, Opensource Designs, ... Dont comment ! : Injection mold design, broom hanger mold, plastic injection, mold making, CAD mold design, DIY injection mold, injection molding process, mold design criteria, plastic product design, custom mold design\n"
"content": "use a formal tone\nspell check the text, fix any errors\nremove emojis\nremove personal preferences or biases\nshorten text if possible but preserve personality\nremove references to preciousplastic, bazar and Discord\nremove any brain/green washing as well suggestions not related to the craft\nContext: howto tutorials, for makers\nConvert units, from metric to imperial and vice versa (in braces)\ndont comment just return as Markdown : Here you will find the 3D model and blueprints to create the wall peg mold!"
},
{
"role": "user",

View File

@ -85,7 +85,8 @@ export enum ModerationFlags {
export enum ContextFlags {
None = 0,
MakerTutorials = 1
MakerTutorials = 1,
Units = 2
}
export enum FormatFlags {
@ -110,7 +111,8 @@ const MODERATION_INSTRUCTIONS = [
]
const CONTEXT_INSTRUCTIONS = [
{ mask: ContextFlags.MakerTutorials, text: "Context: howto tutorials, for makers" }
{ mask: ContextFlags.MakerTutorials, text: "Context: howto tutorials, for makers" },
{ mask: ContextFlags.Units, text: "Convert units, from metric to imperial and vice versa (in braces)" }
]
const FORMAT_INSTRUCTIONS = [
@ -123,7 +125,7 @@ const DEFAULT_CONTENT = ContentFlags.SpellCheck |
ContentFlags.RemovePersonalPrefs |
ContentFlags.Shorten
const DEFAULT_MODERATION = ModerationFlags.MafiaFilter | ModerationFlags.Deprogramming
const DEFAULT_CONTEXT = ContextFlags.MakerTutorials
const DEFAULT_CONTEXT = ContextFlags.MakerTutorials | ContextFlags.Units
const DEFAULT_FORMAT = FormatFlags.Markdown
export const buildPrompt = (

View File

@ -22,13 +22,14 @@ export const I18N_ASSET_PATH = "${SRC_DIR}/${SRC_NAME}-${DST_LANG}${SRC_EXT}"
export const HOWTO_GLOB = '**/config.json'
export const FILES_WEB = 'https://files.polymech.io/files/machines/howtos/'
export const HOWTO_FILTER_LLM = false
export const HOWTO_FILTER_LLM = true
export const HOWTO_ANNOTATIONS = true
export const HOWTO_ANNOTATIONS_CACHE = true
export const HOWTO_COMPLETE_RESOURCES = false
export const HOWTO_MIGRATION = () => path.resolve(resolve("./data/last.json"))
export const HOWTO_ROOT_INTERN = () => path.resolve(resolve("./public/resources/howtos"))
export const HOWTO_ROOT = () => path.resolve(resolve("${OSR_ROOT}/osr-machines/howtos"))
export const HOWTO_FILES_ABS = (id) => `${HOWTO_ROOT()}/${id}`
export const HOWTO_FILES_WEB = (id: string) => `${FILES_WEB}/${id}`

View File

@ -141,7 +141,7 @@ export function cacheAnnotationExample(annotation: IAnnotation) {
async function example() {
// Example howto data (partial)
const exampleHowto: IHowto = {\n slug: "cut-out-shapes-out-of-plastic-sheets-with-a-cnc-",
const exampleHowto: IHowto = { slug: "cut-out-shapes-out-of-plastic-sheets-with-a-cnc-",
title: "Cut out shapes out of plastic sheets with a CNC ",
description: "In this how to, I will show you our process to cut HDPE Sheets using a X-Carve CNC.\n\nHere is the full video in spanish with subtitles https://www.youtube.com/watch?v=4LrrFz802To ",
steps: [

0
src/model/download.ts Normal file
View File

68
src/model/filters.ts Normal file
View File

@ -0,0 +1,68 @@
export * from './howto-model.js'
import { filter as language } from "@/base/kbot.js";
import { HOWTO_FILTER_LLM, HOWTO_ROOT } from "config/config.js";
export const item_path = (item: any) => `${HOWTO_ROOT()}/${item.data.slug}`
const blacklist_ = [];
export const blacklist = ['precious-plastic', 'fair-enough', 'mad-plastic-labs', 'the-flipflopi', 'easymoulds', 'plasticpreneur', 'sustainable-design-studio'];
export const urlBlacklist = ["thenounproject.com", "preciousplastic.com"];
export const bannedWords = ["wizard", "magic2"];
export const wordReplaceMap: Record<string, string> = {
Router: "CNC Router",
"laptop stand": "laptoppie",
Car: "tufftuff"
}
export const shortenUrl = (url: string): string => {
try {
const { hostname, pathname } = new URL(url);
const cleanHost = hostname.replace(/^www\./, '');
const cleanPath = pathname.replace(/\/$/, ''); // remove trailing slash
return `${cleanHost}${decodeURIComponent(cleanPath)}`;
} catch {
// If invalid URL, return as-is
return url;
}
};
// Turns URLs into clickable links, unless blacklisted
export const renderLinks = (text: string): string =>
text.replace(/https?:\/\/[^\s<"]+/gi, (url) => {
const isBlacklisted = urlBlacklist.some((domain) =>
url.toLowerCase().includes(domain.toLowerCase()),
);
return isBlacklisted
? "[Link Removed]"
: `<a class="text-orange-600 underline" href="${url}" target="_blank" rel="noopener noreferrer">${shortenUrl(url)}</a>`;
});
export const filterBannedPhrases = (text: string): string =>
bannedWords.reduce(
(acc, word) => acc.replace(new RegExp(`\\b${word}\\b`, "gi"), "[filtered]"),
text,
);
export const replaceWords = (text: string): string =>
Object.entries(wordReplaceMap).reduce(
(acc, [word, replacement]) =>
acc.replace(new RegExp(`\\b${word}\\b`, "gi"), replacement),
text,
);
export const filters = [
renderLinks,
filterBannedPhrases,
replaceWords,
HOWTO_FILTER_LLM ? language : (text: string) => text,
];
export async function applyFilters(text: string): Promise<string> {
let filtered = text;
for (const filterFn of filters) {
filtered = await filterFn(filtered);
}
return filtered;
}

View File

@ -1,4 +1,3 @@
// Existing IHowto interface with added version field
export const ITEM_TYPE = 'howto'
export interface ICoverImage {
name: string
@ -10,7 +9,6 @@ export interface ICoverImage {
timeCreated: string
contentType: string
}
export interface IStep {
title: string
text: string

View File

@ -1,5 +1,7 @@
import * as path from 'path'
import { findUp } from 'find-up'
import pMap from 'p-map'
import { sanitizeFilename } from "@polymech/fs/utils"
import { execFileSync, execFile } from "child_process";
import { sync as read } from '@polymech/fs/read'
@ -9,46 +11,28 @@ import { sync as rm } from '@polymech/fs/remove'
import type { Loader, LoaderContext } from 'astro/loaders'
export * from './howto-model.js'
export * from './filters.js'
import { filter as language } from "@/base/kbot.js";
import { IHowto, IImage, ITag, ITEM_TYPE } from './howto-model.js';
import type { IAnnotation } from "./annotation.js"
import { blacklist } from './filters.js'
import { download } from './download.js'
import {
HOWTO_FILES_WEB,
HOWTO_FILES_ABS,
HOWTO_FILTER_LLM,
HOWTO_COMPLETE_RESOURCES
} from "config/config.js";
import {
HOWTO_COMPLETE_RESOURCES,
default_image,
HOWTO_ROOT,
HOWTO_GLOB
} from 'config/config.js'
import { env } from '@/base/index.js'
import { slugify } from "@/base/strings.js"
import { got } from 'got'
import pMap from 'p-map'
import { HOWTO_MIGRATION } from '@/app/config.js'
import { createWriteStream } from 'fs';
HOWTO_GLOB,
HOWTO_MIGRATION
} from "config/config.js";
import { logger } from '@/base/index.js'
//export const load = () => get(`${HOWTO_ROOT()}/${HOWTO_GLOB}`, HOWTO_ROOT(), ITEM_TYPE)
export const item_path = (item: any) => `${HOWTO_ROOT()}/${item.data.slug}`
const blacklist_ = [];
const blacklist = ['precious-plastic', 'fair-enough', 'mad-plastic-labs', 'the-flipflopi', 'easymoulds', 'plasticpreneur', 'sustainable-design-studio'];
const download = async (url, outputPath) => {
const stream = createWriteStream(outputPath);
got.stream(url).pipe(stream);
return new Promise((resolve, reject) => {
stream.on('finish', () => resolve(`File downloaded: ${outputPath}`));
stream.on('error', reject);
});
}
export const asset_local_abs = async (item: IHowto, asset: IImage) => {
const sanitizedFilename = sanitizeFilename(asset.name)
const asset_path = path.join(HOWTO_ROOT(), item.slug, sanitizedFilename)
@ -67,63 +51,60 @@ export const downloadFiles = async (dst: string, howto: IHowto) => {
try {
await download(i.downloadUrl, asset_path)
} catch (e) {
console.error('error download step file', e);
logger.error('error download step file', e);
}
} else {
const parts = path.parse(asset_path);
const zipout = path.join(asset_root, 'files')
if (parts.ext === '.rar' || parts.ext === '.zip') {
console.info(`Extracting RAR file ${i.name} to ${zipout}`);
logger.info(`Extracting RAR file ${i.name} to ${zipout}`);
try {
if (!exists(asset_path)) {
console.error(`File does not exist: ${asset_path}`);
logger.error(`File does not exist: ${asset_path}`);
return;
}
if (exists(zipout)) {
//console.info(`Removing existing directory: ${zipout}`);
//logger.info(`Removing existing directory: ${zipout}`);
// rm(zipout);
console.info(`already extracted: ${zipout}`)
logger.info(`already extracted: ${zipout}`)
return
}
return new Promise<boolean>((resolve, reject) => {
const timeout = setTimeout(() => {
child.kill()
console.error("Extraction timed out after 15 seconds")
logger.error("Extraction timed out after 15 seconds")
resolve(false);
}, 15000);
const child = execFile("7z", ["e", "" + asset_path, "-o" + zipout], (err, stdout) => {
clearTimeout(timeout)
if (err) {
console.error(err.message);
logger.error(err.message);
return resolve(false)
}
console.info(`Extracted rar to ${zipout}`)
logger.info(`Extracted rar to ${zipout}`)
return resolve(true)
});
});
} catch (e) {
console.error("Error during RAR extraction", e);
logger.error("Error during RAR extraction", e);
}
}
}
}, { concurrency: 1 })
}
export const asset_local_rel = async (item: IHowto, asset: IImage) => {
const sanitizedFilename = sanitizeFilename(asset.name).toLowerCase()
const asset_path = path.join(HOWTO_ROOT(), item.slug, sanitizedFilename)
if (exists(asset_path)) {
return `/resources/howtos/${item.slug}/${sanitizedFilename}`
} else {
console.log(`Downloading ${asset.downloadUrl} to ${asset_path}`)
await download(asset.downloadUrl, asset_path)
}
return default_image().src
}
export const howtos = async () => {
export const raw = async () => {
const src = HOWTO_MIGRATION()
const data = read(src, 'json') as any;
let howtos = data.v3_howtos as any[]
@ -141,7 +122,7 @@ export const howtos = async () => {
label: 'uncategorized'
}
})
howtos = howtos.filter((h:IHowto) => {
howtos = howtos.filter((h: IHowto) => {
return h.steps.length > 0 && !blacklist.includes(h._createdBy);
});
return howtos
@ -162,9 +143,9 @@ export const defaults = async (data: any, cwd: string, root: string) => {
} catch (error) {
}
return data;
};
}
const onItem = async (store: any, ctx: LoaderContext) => {
const onStoreItem = async (store: any, ctx: LoaderContext) => {
const item = store.data.item as IHowto
item.steps = item.steps || []
item.cover_image && (item.cover_image.src = await asset_local_rel(item, item.cover_image))
@ -180,17 +161,14 @@ const onItem = async (store: any, ctx: LoaderContext) => {
});
return step;
}, { concurrency: 1 })
item.steps.forEach((step) => {
step.images = step.images.filter((image) => asset_local_abs(item, image))
})
item.files = await downloadFiles(item.slug, item)
return item
}
export function loader(): Loader {
const load = async ({
config,
logger,
@ -200,7 +178,7 @@ export function loader(): Loader {
generateDigest }: LoaderContext) => {
store.clear()
let items = await howtos()
let items = await raw()
for (const item of items) {
const id = item.slug
const data = {
@ -219,7 +197,7 @@ export function loader(): Loader {
data: data
}
await onItem(storeItem, {
await onStoreItem(storeItem, {
logger,
watcher,
parseData,
@ -236,64 +214,3 @@ export function loader(): Loader {
load
};
}
////////////////////////////////
//
// Filters
const urlBlacklist = ["thenounproject.com", "preciousplastic.com"];
const bannedWords = ["wizard", "magic2"];
const wordReplaceMap: Record<string, string> = {
Router: "CNC Router",
"laptop stand": "laptoppie",
};
export const shortenUrl = (url: string): string => {
try {
const { hostname, pathname } = new URL(url);
const cleanHost = hostname.replace(/^www\./, '');
const cleanPath = pathname.replace(/\/$/, ''); // remove trailing slash
return `${cleanHost}${decodeURIComponent(cleanPath)}`;
} catch {
// If invalid URL, return as-is
return url;
}
};
// Turns URLs into clickable links, unless blacklisted
export const renderLinks = (text: string): string =>
text.replace(/https?:\/\/[^\s<"]+/gi, (url) => {
const isBlacklisted = urlBlacklist.some((domain) =>
url.toLowerCase().includes(domain.toLowerCase()),
);
return isBlacklisted
? "[Link Removed]"
: `<a class="text-orange-600 underline" href="${url}" target="_blank" rel="noopener noreferrer">${shortenUrl(url)}</a>`;
});
export const filterBannedPhrases = (text: string): string =>
bannedWords.reduce(
(acc, word) => acc.replace(new RegExp(`\\b${word}\\b`, "gi"), "[filtered]"),
text,
);
export const replaceWords = (text: string): string =>
Object.entries(wordReplaceMap).reduce(
(acc, [word, replacement]) =>
acc.replace(new RegExp(`\\b${word}\\b`, "gi"), replacement),
text,
);
export const filters = [
renderLinks,
filterBannedPhrases,
replaceWords,
HOWTO_FILTER_LLM ? language : (text: string) => text,
];
export async function applyFilters(text: string): Promise<string> {
let filtered = text;
for (const filterFn of filters) {
filtered = await filterFn(filtered);
}
return filtered;
}