J
J
Juribiyan2014-05-27 07:18:53
Character encoding
Juribiyan, 2014-05-27 07:18:53

How to batch rename files with wrong encoding in Windows?

There is a folder with files named krakozyabry. The "decoder" has determined that it is necessary to convert the encoding from CP-1251 to UTF-8. There are many ways to do this in *nix, but how do you do it under Windows?

Answer the question

In order to leave comments, you need to log in

2 answer(s)
J
Juribiyan, 2014-05-27
@Juribiyan

In general, I had to write the program myself. Node.js because I know it best.

var glob = require("glob");
var argv = require('optimist').argv;
var encoding = require("encoding");
var fs = require('fs');

var ext = argv.ext || '*'
, to = argv.to || 'UTF-8'
, from = argv.from || 'WINDOWS-1251'
, loge = argv.log ? (argv.log == 'on' ? true : false) : false;
var outputDir= argv.dir || '_OUTPUT';

if(!fs.existsSync(outputDir)) fs.mkdir(outputDir);

// options is optional
glob("*."+ext, function (err, files) {
  if(!err) iter(files, function(name) {
  	var newName = encoding.convert(name, from, to);
  	fs.rename('./'+name, './'+outputDir+'/'+newName, function(err) {
  		if(err) console.log(err);
  		if(loge) console.log('Renamed from ' + name + ' to ' + newName);
  	});
  });
  else console.log('ERROR: ', err);
});

function iter(array, callback) {
  if(typeof array !== 'object') return callback(array);
    var i=0, len = array.length;
    for ( ; i < len ; i++ ) {
      callback(array[i]);
    }
}

Usage:
(all arguments are optional).
Note: I may have confused which encoding to which to translate. Maybe you need to convert from UTF-8 to WINDOWS-1251 on the contrary. It's the same from what point of view. I followed what " Decoder " said.

A
Andrey Plax, 2014-05-27
@Hereigo

alternatively, save this code to a text file, edit the variables, save as .BAT, and run
============================ ================================================= ===
/*
@echo off && cls
set WinDirNet=%WinDir%\Microsoft.NET\Framework
IF EXIST "%WinDirNet%\v2.0.50727\csc.exe" set csc="%WinDirNet%\v2.0.50727\csc .exe"
IF EXIST "%WinDirNet%\v3.5\csc.exe" set csc="%WinDirNet%\v3.5\csc.exe"
IF EXIST "%WinDirNet%\v4.0.30319\csc.exe" set csc="%WinDirNet%\v4.0.30319\csc.exe"
%csc% /nologo /out:"%~0.exe" %0
"%~0.exe"
del "%~0.exe"
exit
*/
// SIMPLE CHANGING THE ENCODING OF TEXT FILES IN C# LANGUAGE:
namespace ConsoleApplication123
{
using System;
using System.IO;
using System.Text;
class Program
{
static void Main(string[] args)
{
// SETTING VARIABLES :
// FOLDER FROM THE FILE FOR CONVERSION :
string folder = @"C:\temp";
// FILE EXTENSION FOR CONVERSION (with a dot) :
string fileExtention = "*.txt";
// INCOMING ENCODING
Encoding encodingIn = Encoding.GetEncoding("windows-1251");
// OUTPUT ENCODING
Encoding encodingOut = Encoding.GetEncoding("utf-8");
// PROGRAM EXECUTION :
FileInfo[] files = new DirectoryInfo(folder).GetFiles(fileExtention);
if (files.Length < 1)
{
Console.WriteLine("There are no files in the folder " + folder + " " + fileExtention);
}
else
{
for (int i = 0; i < files.Length; i++)
{
try
{
string outputDir = folder + @"\_OUTPUT\";
if (!Directory.Exists(outputDir)) Directory.CreateDirectory(outputDir);
FileStream fs = new FileStream(files[i].FullName, FileMode.Open, FileAccess.Read);
StreamReader sr = new StreamReader(fs, encodingIn);
FileStream fsOut = new FileStream(outputDir + files[i].Name, FileMode.CreateNew, FileAccess.Write);
StreamWriter sw = new StreamWriter(fsOut, encodingOut);
String readLine;
while ((readLine = sr.ReadLine()) != null) sw.WriteLine(readLine);
sw.Close();
fsOut.Close();
sr.Close();
fs.Close();
}
catch (Exception ex)
{
Console.WriteLine("OBJECT : " + args[i].ToString());
Console.WriteLine();
Console.WriteLine("ERROR : " + ex.ToString());
Console.ReadKey();
}
}
}
/////////////////////////////////
System.Console.WriteLine();
System.Console.WriteLine();
System.Console.WriteLine("Completed.");
System.Console.ReadKey();
}
}
}

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question